From a480ee45348b345b654a9e0a3e9822e5cdd23b3e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 14 Aug 2023 22:15:32 -0700 Subject: [PATCH 001/165] Initial work to resync Metal work on master Does not yet link - async sockets missing implementation Migrating Metal-cpp over to CMake management Also rolling Metal-cpp up to 13.3 SDK. --- CMakeLists.txt | 5 + Sources/Plasma/Apps/plClient/CMakeLists.txt | 22 +- .../Plasma/Apps/plClient/Mac-Cocoa/main.mm | 41 + Sources/Plasma/Apps/plClient/plClient.cpp | 8 + Sources/Plasma/CoreLib/plQuality.h | 1 + Sources/Plasma/FeatureLib/CMakeLists.txt | 3 + .../Plasma/FeatureLib/inc/pfAllCreatables.h | 4 + .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 53 + .../ShaderSrc/BiasNormals.metal | 123 + .../ShaderSrc/CompCosines.metal | 128 + .../ShaderSrc/FixedPipelineShaders.metal | 660 +++ .../pfMetalPipeline/ShaderSrc/Grass.metal | 86 + .../ShaderSrc/PlateShaders.metal | 102 + .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 165 + .../pfMetalPipeline/ShaderSrc/ShaderVertex.h | 77 + .../ShaderSrc/WaveDecEnv.metal | 402 ++ .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 474 ++ .../pfMetalPipelineCreatable.h | 49 + .../pfMetalPipeline/plMetalDevice.cpp | 1145 +++++ .../pfMetalPipeline/plMetalDevice.h | 246 + .../pfMetalPipeline/plMetalDeviceRef.cpp | 83 + .../pfMetalPipeline/plMetalDeviceRef.h | 293 ++ .../pfMetalPipeline/plMetalDeviceRefs.cpp | 164 + .../pfMetalPipeline/plMetalFragmentShader.cpp | 81 + .../pfMetalPipeline/plMetalFragmentShader.h | 66 + .../plMetalMaterialShaderRef.cpp | 586 +++ .../plMetalMaterialShaderRef.h | 110 + .../pfMetalPipeline/plMetalPipeline.cpp | 4343 +++++++++++++++++ .../pfMetalPipeline/plMetalPipeline.h | 238 + .../pfMetalPipeline/plMetalPlateManager.cpp | 146 + .../pfMetalPipeline/plMetalPlateManager.h | 74 + .../pfMetalPipeline/plMetalShader.cpp | 84 + .../pfMetalPipeline/plMetalShader.h | 76 + .../pfMetalPipeline/plMetalVertexShader.cpp | 81 + .../pfMetalPipeline/plMetalVertexShader.h | 66 + .../Plasma/NucleusLib/inc/plCreatableIndex.h | 1 + .../PubUtilLib/plAudio/plAudioSystem.cpp | 4 + .../plAudio/plAudioSystem_Private.h | 4 + .../PubUtilLib/plSurface/hsGMaterial.cpp | 9 + .../Plasma/PubUtilLib/plSurface/hsGMaterial.h | 10 + 40 files changed, 10312 insertions(+), 1 deletion(-) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/pfMetalPipelineCreatable.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 69cbc039b0..cffa6c7e76 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -147,6 +147,7 @@ endif(PLASMA_EXTERNAL_RELEASE) # Pipeline Renderers cmake_dependent_option(PLASMA_PIPELINE_DX "Enable DirectX rendering pipeline?" ON "DirectX_FOUND" OFF) cmake_dependent_option(PLASMA_PIPELINE_GL "Enable OpenGL rendering pipeline?" ON "TARGET epoxy::epoxy" OFF) +cmake_dependent_option(PLASMA_PIPELINE_METAL "Enable Metal rendering pipeline?" ON "APPLE" OFF) if(PLASMA_PIPELINE_DX) add_definitions(-DPLASMA_PIPELINE_DX) @@ -156,6 +157,10 @@ if(PLASMA_PIPELINE_GL) add_definitions(-DPLASMA_PIPELINE_GL) endif(PLASMA_PIPELINE_GL) +if(PLASMA_PIPELINE_METAL) + add_definitions(-DPLASMA_PIPELINE_METAL) +endif(PLASMA_PIPELINE_METAL) + # Allow us to disable certain parts of the build option(PLASMA_BUILD_CLIENT "Do we want to build plClient?" ON) cmake_dependent_option(PLASMA_BUILD_MAX_PLUGIN "Do we want to build the 3ds Max plugin?" OFF "TARGET 3dsm" OFF) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 72b9ace7a8..8ddb5b0481 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -110,13 +110,28 @@ elseif(APPLE) Mac-Cocoa/PLSServerStatus.h ) list(APPEND plClient_RESOURCES - Mac-Cocoa/Assets.xcassets Mac-Cocoa/banner.png Mac-Cocoa/banner@2x.png Mac-Cocoa/MainMenu.xib Mac-Cocoa/PLSLoginWindowController.xib Mac-Cocoa/PLSPatcherWindowController.xib + ) + #shaders need to be compiled as part of the app + #this could change in the future, but for now the Metal code expects the library to be compiled in the app + set(plClient_SHADERS + ../../FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal ) + set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) + source_group("Metal Shaders" FILES ${plClient_SHADERS}) + set(plClient_SOURCES ${plClient_SOURCES} ${plClient_SHADERS}) + include_directories("../../FeatureLib/pfMetalPipeline/metal-cpp" + "../../FeatureLib/pfMetalPipeline/ShaderSrc") else() list(APPEND plClient_SOURCES main.cpp @@ -160,6 +175,10 @@ if(APPLE) XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES" ) target_compile_options(plClient PRIVATE -fobjc-arc) + target_sources(plClient PRIVATE Mac-Cocoa/Assets.xcassets) + set_source_files_properties(Mac-Cocoa/Assets.xcassets ${RESOURCES} PROPERTIES + MACOSX_PACKAGE_LOCATION Resources + ) if(PLASMA_APPLE_DEVELOPMENT_TEAM_ID) set_target_properties(plClient PROPERTIES @@ -229,6 +248,7 @@ target_link_libraries( pfPython $<$:pfDXPipeline> $<$:pfGLPipeline> + $<$:pfMetalPipeline> CURL::libcurl "$<$:-framework Cocoa>" "$<$:-framework QuartzCore>" diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index a861f94403..64d21f4475 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -42,6 +42,12 @@ // System Frameworks #import +#if PLASMA_PIPELINE_GL +#import +#endif +#if PLASMA_PIPELINE_METAL +#import +#endif #import // Cocoa client @@ -63,7 +69,13 @@ #include "plCmdParser.h" #include "pfConsoleCore/pfConsoleEngine.h" #include "pfGameGUIMgr/pfGameGUIMgr.h" +#if PLASMA_PIPELINE_GL +#include "pfGLPipeline/plGLPipeline.h" +#endif #include "plInputCore/plInputDevice.h" +#if PLASMA_PIPELINE_METAL +#include "pfMetalPipeline/plMetalPipeline.h" +#endif #include "plMessage/plDisplayScaleChangedMsg.h" #include "plMessageBox/hsMessageBox.h" #include "plNetClient/plNetClientMgr.h" @@ -456,6 +468,35 @@ - (void)startClient gClient.SetClientWindow((hsWindowHndl)(__bridge void*)self.window); gClient.SetClientDisplay((hsWindowHndl)NULL); +#if PLASMA_PIPELINE_METAL + plMetalPipeline *pipeline = (plMetalPipeline *)gClient->GetPipeline(); + pipeline->currentDrawableCallback = [self] { + id< CAMetalDrawable > drawable; + drawable = [((CAMetalLayer *) _renderLayer) nextDrawable]; + CA::MetalDrawable * mtlDrawable = ( __bridge CA::MetalDrawable* ) drawable; + mtlDrawable->retain(); + return mtlDrawable; + }; + + NSString *productTitle = [NSString stringWithCString:plProduct::LongName().c_str() encoding:NSUTF8StringEncoding]; + id device = ((CAMetalLayer *) self.window.contentView.layer).device; +#ifdef HS_DEBUGGING + [self.window setTitle:[NSString stringWithFormat:@"%@ - %@, %@", + productTitle, +#ifdef __arm64__ + @"ARM64", +#else + @"x86_64", +#endif + device.name]]; +#else + [self.window setTitle:productTitle]; +#endif + +#else + [self.window setTitle:[NSString stringWithCString:plProduct::LongName().c_str() encoding:NSUTF8StringEncoding]]; +#endif + if (!gClient) { exit(0); } diff --git a/Sources/Plasma/Apps/plClient/plClient.cpp b/Sources/Plasma/Apps/plClient/plClient.cpp index 9f6b741dd9..64915060b5 100644 --- a/Sources/Plasma/Apps/plClient/plClient.cpp +++ b/Sources/Plasma/Apps/plClient/plClient.cpp @@ -148,6 +148,9 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifdef PLASMA_PIPELINE_GL #include "pfGLPipeline/plGLPipeline.h" #endif +#ifdef PLASMA_PIPELINE_METAL + #include "pfMetalPipeline/plMetalPipeline.h" +#endif #include "pfJournalBook/pfJournalBook.h" #include "pfLocalizationMgr/pfLocalizationMgr.h" #include "pfMoviePlayer/plMoviePlayer.h" @@ -428,6 +431,11 @@ plPipeline* plClient::ICreatePipeline(hsWindowHndl disp, hsWindowHndl hWnd, cons if (renderer == hsG3DDeviceSelector::kDevTypeOpenGL) return new plGLPipeline(disp, hWnd, devMode); #endif + +#ifdef PLASMA_PIPELINE_METAL + //if (renderer == hsG3DDeviceSelector::kDevTypeOpenGL) + return new plMetalPipeline(disp, hWnd, devMode); +#endif return new plNullPipeline(disp, hWnd, devMode); } diff --git a/Sources/Plasma/CoreLib/plQuality.h b/Sources/Plasma/CoreLib/plQuality.h index 7d2f3b5bec..b0ae10f40e 100644 --- a/Sources/Plasma/CoreLib/plQuality.h +++ b/Sources/Plasma/CoreLib/plQuality.h @@ -64,6 +64,7 @@ class plQuality friend class plClient; friend class plQualitySlider; friend class plDXPipeline; + friend class plMetalPipeline; // Set by the app according to user preference. static void SetQuality(int q); diff --git a/Sources/Plasma/FeatureLib/CMakeLists.txt b/Sources/Plasma/FeatureLib/CMakeLists.txt index 345d0cc065..2586232833 100644 --- a/Sources/Plasma/FeatureLib/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/CMakeLists.txt @@ -23,6 +23,9 @@ add_subdirectory(pfGameScoreMgr) if(PLASMA_PIPELINE_GL) add_subdirectory(pfGLPipeline) endif() +if(PLASMA_PIPELINE_METAL) + add_subdirectory(pfMetalPipeline) +endif() add_subdirectory(pfJournalBook) add_subdirectory(pfLocalizationMgr) add_subdirectory(pfMessage) diff --git a/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h b/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h index a42818ffa8..6b7079516e 100644 --- a/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h +++ b/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h @@ -59,6 +59,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifdef PLASMA_PIPELINE_GL #include "pfGLPipeline/pfGLPipelineCreatable.h" +#endif" + +#ifdef PLASMA_PIPELINE_METAL + #include "pfMetalPipeline/pfMetalPipelineCreatable.h" #endif #include "pfJournalBook/pfJournalBookCreatable.h" diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt new file mode 100644 index 0000000000..bd336a210a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -0,0 +1,53 @@ +include(FetchContent) + +FetchContent_Declare( + metalcpp + DOWNLOAD_EXTRACT_TIMESTAMP TRUE + URL_HASH_SHA256 0afd87ca851465191ae4e3980aa036c7e9e02fe32e7c760ac1a74244aae6023b + URL "https://developer.apple.com/metal/cpp/files/metal-cpp_macOS13.3_iOS16.4.zip" +) + +FetchContent_MakeAvailable(metalcpp) + +set(pfMetalPipeline_SOURCES + plMetalDevice.cpp + plMetalDeviceRefs.cpp + plMetalMaterialShaderRef.cpp + plMetalPipeline.cpp + plMetalPlateManager.cpp + plMetalShader.cpp + plMetalFragmentShader.cpp + plMetalVertexShader.cpp +) + +set(pfMetalPipeline_HEADERS + plMetalDevice.h + plMetalDeviceRef.h + plMetalMaterialShaderRef.h + plMetalPipeline.h + plMetalPlateManager.h + plMetalShader.h + plMetalFragmentShader.h + plMetalVertexShader.h + ShaderSrc/ShaderTypes.h + ShaderSrc/ShaderVertex.h + pfMetalPipelineCreatable.h +) + +plasma_library(pfMetalPipeline SOURCES ${pfMetalPipeline_SOURCES} ${pfMetalPipeline_HEADERS}) +target_link_libraries(pfMetalPipeline + PUBLIC + CoreLib + pnNucleusInc + plPipeline + "-framework Metal" + PRIVATE + plStatusLog + INTERFACE + pnFactory +) +target_include_directories(pfMetalPipeline PUBLIC ${metalcpp_SOURCE_DIR}) +target_include_directories(pfMetalPipeline PUBLIC "ShaderSrc") + +source_group("Source Files" FILES ${pfMetalPipeline_SOURCES}) +source_group("Header Files" FILES ${pfMetalPipeline_HEADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal new file mode 100644 index 0000000000..1961896856 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal @@ -0,0 +1,123 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct { + float4 TexU0; + float4 TexV0; + + float4 TexU1; + float4 TexV1; + + float4 Numbers; + + float4 ScaleBias; +} vs_BiasNormalsUniforms; + +typedef struct { + float4 position [[position]]; + float4 texCoord0; + float4 texCoord1; + //not actually colors, just emulating the registers + float4 color1; + float4 color2; +} vs_BiasNormalsOut; + +vertex vs_BiasNormalsOut vs_BiasNormals(Vertex in [[stage_in]], + constant vs_BiasNormalsUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + vs_BiasNormalsOut out; + + out.position = float4(in.position, 1.0); + + out.texCoord0 = float4( + dot(float4(in.texCoord1, 1.0), uniforms.TexU0), + dot(float4(in.texCoord1, 1.0), uniforms.TexV0), + 0, + 1 + ); + + out.texCoord1 = float4( + dot(float4(in.texCoord1, 1.0), uniforms.TexU1), + dot(float4(in.texCoord1, 1.0), uniforms.TexV1), + 0, + 1 + ); + + out.color1 = uniforms.ScaleBias.xxzz; + out.color2 = uniforms.ScaleBias.yyzz; + + return out; +} + +fragment float4 ps_BiasNormals(vs_BiasNormalsOut in [[stage_in]], + texture2d t0 [[ texture(0) ]], + texture2d t1 [[ texture(1) ]]) { + // Composite the cosines together. + // Input map is cosine(pix) for each of + // the 4 waves. + // + // The constants are set up so: + // Nx = -freq * amp * dirX * cos(pix); + // Ny = -freq * amp * dirY * cos(pix); + // So c[i].x = -freq[i] * amp[i] * dirX[i] + // etc. + // All textures are: + // (r,g,b,a) = (cos(), cos(), 1, 1) + // + // So c[0].z = 1, but all other c[i].z = 0 + // Note also the c4 used for biasing back at the end. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + float4 sample1 = t0.sample(colorSampler, in.texCoord0.xy); + float4 sample2 = t1.sample(colorSampler, in.texCoord0.xy); + float4 out = float4(sample1.rgb - 0.5 + sample2.rgb - 0.5, + sample1.a + sample2.a); + out.rgb = (out.rgb * in.color1.rgb) + in.color2.rgb; + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal new file mode 100644 index 0000000000..33d220e491 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal @@ -0,0 +1,128 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct { + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; +} vs_CompCosinesUniforms; + +typedef struct { + float4 position [[position]]; + float4 texCoord0; + float4 texCoord1; + float4 texCoord2; + float4 texCoord3; +} vs_CompCosinesnInOut; + +vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[stage_in]], + constant vs_CompCosinesUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + vs_CompCosinesnInOut out; + + out.position = float4(in.position, 1.0); + + float4 texCoord = float4(0, 0, 0, 1); + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c0); + out.texCoord0 = texCoord; + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c1); + out.texCoord1 = texCoord; + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c2); + out.texCoord2 = texCoord; + texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c3); + out.texCoord3 = texCoord; + + return out; +} + +typedef struct { + float4 c0; + float4 c1; + float4 c2; + float4 c3; + float4 c4; + float4 c5; +} ps_CompCosinesUniforms; + +fragment float4 ps_CompCosines(vs_CompCosinesnInOut in [[stage_in]], + texture2d t0 [[ texture(0) ]], + texture2d t1 [[ texture(1) ]], + texture2d t2 [[ texture(2) ]], + texture2d t3 [[ texture(3) ]], + constant ps_CompCosinesUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + // Composite the cosines together. + // Input map is cosine(pix) for each of + // the 4 waves. + // + // The constants are set up so: + // Nx = -freq * amp * dirX * cos(pix); + // Ny = -freq * amp * dirY * cos(pix); + // So c[i].x = -freq[i] * amp[i] * dirX[i] + // etc. + // All textures are: + // (r,g,b,a) = (cos(), cos(), 1, 1) + // + // So c[0].z = 1, but all other c[i].z = 0 + // Note also the c4 used for biasing back at the end. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + + float4 out = 2 * (t0.sample(colorSampler, fract(in.texCoord0.xy)) - 0.5) * uniforms.c0; + out += 2 * (t1.sample(colorSampler, fract(in.texCoord1.xy)) - 0.5) * uniforms.c1; + out += 2 * (t2.sample(colorSampler, fract(in.texCoord2.xy)) - 0.5) * uniforms.c2; + out += 2 * (t3.sample(colorSampler, fract(in.texCoord3.xy)) - 0.5) * uniforms.c3; + // Now bias it back into range [0..1] for output. + out *= uniforms.c4; + out += uniforms.c5; + out.b = 1.0; + out.a = 1.0; + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal new file mode 100644 index 0000000000..9ed3370dd7 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -0,0 +1,660 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + + +#include +using namespace metal; +// File for Metal kernel and shader functions + +#include +#include + +// Including header shared between this Metal shader code and Swift/C code executing Metal API commands +#include "ShaderVertex.h" +#include "ShaderTypes.h" + +//copying this direction from hsGMatState because I am a horrible person but we can't import the header here in since it includes a lot of class stuff. +//FIXME: Come up with something better. +enum hsGMatMiscFlags { + kMiscWireFrame = 0x1, // dev (running out of bits) + kMiscDrawMeshOutlines = 0x2, // dev, currently unimplemented + kMiscTwoSided = 0x4, // view,dev + kMiscDrawAsSplats = 0x8, // dev? bwt + kMiscAdjustPlane = 0x10, + kMiscAdjustCylinder = 0x20, + kMiscAdjustSphere = 0x40, + kMiscAdjust = kMiscAdjustPlane | kMiscAdjustCylinder| kMiscAdjustSphere, + kMiscTroubledLoner = 0x80, + kMiscBindSkip = 0x100, + kMiscBindMask = 0x200, + kMiscBindNext = 0x400, + kMiscLightMap = 0x800, + kMiscUseReflectionXform = 0x1000, // Use the calculated reflection environment + // texture transform instead of layer->GetTransform() + kMiscPerspProjection = 0x2000, + kMiscOrthoProjection = 0x4000, + kMiscProjection = kMiscPerspProjection | kMiscOrthoProjection, + + kMiscRestartPassHere = 0x8000, // Tells pipeline to start a new pass beginning with this layer + // Kinda like troubledLoner, but only cuts off lower layers, not + // higher ones (kMiscBindNext sometimes does this by implication) + + kMiscBumpLayer = 0x10000, + kMiscBumpDu = 0x20000, + kMiscBumpDv = 0x40000, + kMiscBumpDw = 0x80000, + kMiscBumpChans = kMiscBumpDu | kMiscBumpDv | kMiscBumpDw, + + kMiscNoShadowAlpha = 0x100000, + kMiscUseRefractionXform = 0x200000, // Use a refraction-like hack. + kMiscCam2Screen = 0x400000, // Expects tex coords to be XYZ in camera space. Does a cam to screen (not NDC) projection + // and swaps Z with W, so that the texture projection can produce projected 2D screen coordinates. + + kAllMiscFlags = 0xffffffff +}; + +enum hsGMatBlendFlags { + kBlendTest = 0x1, // dev + // Rest of blends are mutually exclusive + kBlendAlpha = 0x2, // dev + kBlendMult = 0x4, // dev + kBlendAdd = 0x8, // dev + kBlendAddColorTimesAlpha = 0x10, // dev + kBlendAntiAlias = 0x20, + kBlendDetail = 0x40, + kBlendNoColor = 0x80, // dev + kBlendMADD = 0x100, + kBlendDot3 = 0x200, + kBlendAddSigned = 0x400, + kBlendAddSigned2X = 0x800, + kBlendMask = kBlendAlpha + | kBlendMult + | kBlendAdd + | kBlendAddColorTimesAlpha + | kBlendDetail + | kBlendMADD + | kBlendDot3 + | kBlendAddSigned + | kBlendAddSigned2X, + kBlendInvertAlpha = 0x1000, // dev + kBlendInvertColor = 0x2000, // dev + kBlendAlphaMult = 0x4000, + kBlendAlphaAdd = 0x8000, + kBlendNoVtxAlpha = 0x10000, + kBlendNoTexColor = 0x20000, + kBlendNoTexAlpha = 0x40000, + kBlendInvertVtxAlpha = 0x80000, // Invert ONLY the vertex alpha source + kBlendAlphaAlways = 0x100000, // Alpha test always passes (even for alpha=0). + kBlendInvertFinalColor = 0x200000, + kBlendInvertFinalAlpha = 0x400000, + kBlendEnvBumpNext = 0x800000, + kBlendSubtract = 0x1000000, + kBlendRevSubtract = 0x2000000, + kBlendAlphaTestHigh = 0x4000000, + kBlendAlphaPremultiplied = 0x8000000 +}; + +enum plUVWSrcModifiers { + kUVWPassThru = 0x00000000, + kUVWIdxMask = 0x0000ffff, + kUVWNormal = 0x00010000, + kUVWPosition = 0x00020000, + kUVWReflect = 0x00030000 +}; + +using namespace metal; + +typedef struct { + array, 8> textures [[ texture(FragmentShaderArgumentAttributeTextures) ]]; + array, 8> cubicTextures [[ texture(FragmentShaderArgumentAttributeCubicTextures) ]]; + constant float4* colors [[ buffer(FragmentShaderArgumentAttributeColors) ]]; + constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; +} FragmentShaderArguments; + +inline float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix); + +typedef struct +{ + float4 position [[position]]; + float3 texCoord1 [[function_constant(hasLayer1)]]; + float3 texCoord2 [[function_constant(hasLayer2)]]; + float3 texCoord3 [[function_constant(hasLayer3)]]; + float3 texCoord4 [[function_constant(hasLayer4)]]; + float3 texCoord5 [[function_constant(hasLayer5)]]; + float3 texCoord6 [[function_constant(hasLayer6)]]; + float3 texCoord7 [[function_constant(hasLayer7)]]; + float3 texCoord8 [[function_constant(hasLayer8)]]; + //float4 normal; + half4 vtxColor; + half4 fogColor; + //float4 vCamNormal; +} ColorInOut; + +vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + uint v_id [[vertex_id]]) +{ + ColorInOut out; + //we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. + float4 inColor = float4(in.color.b, in.color.g, in.color.r, in.color.a) / float4(255.0f); + + float4 MAmbient = mix(inColor, uniforms.ambientCol, uniforms.ambientSrc); + float4 MDiffuse = mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc); + float4 MEmissive = mix(inColor, uniforms.emissiveCol, uniforms.emissiveSrc); + float4 MSpecular = mix(inColor, uniforms.specularCol, uniforms.specularSrc); + + float4 LAmbient = float4(0.0, 0.0, 0.0, 0.0); + float4 LDiffuse = float4(0.0, 0.0, 0.0, 0.0); + + float3 Ndirection = normalize(uniforms.worldToLocalMatrix * float4(in.normal, 0.0)).xyz; + + for (uint i = 0; i < 8; i++) { + plMetalShaderLightSource lightSource = uniforms.lampSources[i]; + if(lightSource.scale == 0) + continue; + + float attenuation; + float3 direction; + + if (lightSource.position.w == 0.0) { + // Directional Light with no attenuation + direction = -(lightSource.direction).xyz; + attenuation = 1.0; + } else { + // Omni Light in all directions + float3 v2l = lightSource.position.xyz - float3(uniforms.localToWorldMatrix * float4(in.position, 1.0)); + float distance = length(v2l); + direction = normalize(v2l); + + attenuation = 1.0 / (lightSource.constAtten + lightSource.linAtten * distance + lightSource.quadAtten * pow(distance, 2.0)); + + if (uniforms.lampSources[i].spotProps.x > 0.0) { + // Spot Light with cone falloff + float a = dot(direction.xyz, normalize(-lightSource.direction).xyz); + float theta = lightSource.spotProps.y; + float phi = lightSource.spotProps.z; + float result = pow((a - phi) / (theta - phi), lightSource.spotProps.x); + + attenuation *= clamp(result, 0.0, 1.0); + } + } + + LAmbient.rgb = LAmbient.rgb + attenuation * (uniforms.lampSources[i].ambient.rgb * uniforms.lampSources[i].scale); + float3 dotResult = dot(Ndirection, direction); + LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (uniforms.lampSources[i].diffuse.rgb * uniforms.lampSources[i].scale) * max(0.0, dotResult) * attenuation; + } + + float4 ambient = clamp(float4(MAmbient) * (uniforms.globalAmb + LAmbient), 0.0, 1.0); + float4 diffuse = clamp(LDiffuse, 0.0, 1.0); + float4 material = clamp(ambient + diffuse + float4(MEmissive), 0.0, 1.0); + + out.vtxColor = half4(float4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a))); + + float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); + + //Fog + out.fogColor.a = 1.0; + if (uniforms.fogExponential > 0) { + out.fogColor.a = exp(-pow(uniforms.fogValues.y * length(vCamPosition), uniforms.fogValues.x)); + } else { + if (uniforms.fogValues.y > 0.0) { + float start = uniforms.fogValues.x; + float end = uniforms.fogValues.y; + out.fogColor.a = (end - length(vCamPosition.xyz)) / (end - start); + } + } + out.fogColor.rgb = half3(uniforms.fogColor); + + float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0)); + + if(hasLayer1) + out.texCoord1 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[0].transform, uniforms.uvTransforms[0].UVWSrc, uniforms.uvTransforms[0].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + if(hasLayer2) + out.texCoord2 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[1].transform, uniforms.uvTransforms[1].UVWSrc, uniforms.uvTransforms[1].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + if(hasLayer3) + out.texCoord3 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[2].transform, uniforms.uvTransforms[2].UVWSrc, uniforms.uvTransforms[2].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + if(hasLayer4) + out.texCoord4 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[3].transform, uniforms.uvTransforms[3].UVWSrc, uniforms.uvTransforms[3].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + if(hasLayer5) + out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[4].transform, uniforms.uvTransforms[4].UVWSrc, uniforms.uvTransforms[4].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + if(hasLayer6) + out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[5].transform, uniforms.uvTransforms[5].UVWSrc, uniforms.uvTransforms[5].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + if(hasLayer7) + out.texCoord7 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[6].transform, uniforms.uvTransforms[6].UVWSrc, uniforms.uvTransforms[6].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + if(hasLayer8) + out.texCoord8 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[7].transform, uniforms.uvTransforms[7].UVWSrc, uniforms.uvTransforms[7].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + + out.position = uniforms.projectionMatrix * vCamPosition; + + return out; +} + +inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags); +inline void blend(half4 srcSample, thread half4 &destSample, uint32_t blendFlags); + +inline float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix) { + //Note: If we want to require newer versions of Metal/newer hardware we could pass function pointers instead of doing these ifs. + if (flags & (kMiscUseReflectionXform | kMiscUseRefractionXform)) { + matrix = camToWorldMatrix; + matrix[3][0] = matrix[3][1] = matrix[3][2] = 0; + + // This is just a rotation about X of Pi/2 (y = z, z = -y), + // followed by flipping Z to reflect back towards us (z = -z). + + // swap mat[1][0] and mat[2][0] + float temp; + temp = matrix[0][1]; + matrix[0][1] = matrix[0][2]; + matrix[0][2] = temp; + + // swap mat[1][1] and mat[2][1] + temp = matrix[1][1]; + matrix[1][1] = matrix[1][2]; + matrix[1][2] = temp; + + // swap mat[1][2] and mat[2][2] + temp = matrix[2][1]; + matrix[2][1] = matrix[2][2]; + matrix[2][2] = temp; + + if (flags & kMiscUseRefractionXform) { + // Same as reflection, but then matrix = matrix * scaleMatNegateZ. + + // mat[0][2] = -mat[0][2]; + matrix[2][0] = -matrix[2][0]; + + // mat[1][2] = -mat[1][2]; + matrix[2][1] = -matrix[2][1]; + + // mat[2][2] = -mat[2][2]; + matrix[2][2] = -matrix[2][2]; + } + } + else if (flags & kMiscCam2Screen) { + + matrix_float4x4 translationMatrix = matrix_float4x4(1.0); + // mat.MakeScaleMat(hsVector3 camScale(0.5f, -0.5f, 1.f)); + translationMatrix[0][0] = 0.5f; + translationMatrix[1][1] = -0.5f; + + matrix_float4x4 scaleMatrix = matrix_float4x4(1.0); + + // hsVector3 camTrans(0.5f, 0.5f, 0.f); + scaleMatrix[3][0] = 0.5f; + scaleMatrix[3][1] = -0.5f; + + matrix = scaleMatrix * translationMatrix; + + // The scale and trans move us from NDC to Screen space. We need to swap + // the Z and W coordinates so that the texture projection will divide by W + // and give us projected 2D coordinates. + float temp; + + // swap mat[2][2] and mat[3][2] + temp = matrix[2][2]; + matrix[2][2] = matrix[2][3]; + matrix[2][3] = temp; + + // swap mat[2][3] and mat[3][3] + temp = matrix[3][2]; + matrix[3][2] = matrix[3][3]; + matrix[3][3] = temp; + + // Multiply by the projection matrix + matrix = matrix * projectionMatrix; + } else if (flags & kMiscProjection) { + matrix_float4x4 cam2World = camToWorldMatrix; + if( !(UVWSrc & kUVWPosition) ) { + cam2World.columns[3][0] = 0; + cam2World.columns[3][1] = 0; + cam2World.columns[3][2] = 0; + } + + matrix = matrix * cam2World; + } + + float4 sampleCoord; + + switch (UVWSrc) { + case kUVWNormal: + { + sampleCoord = matrix * normal; + } + break; + case kUVWPosition: + { + sampleCoord = matrix * camPosition; + } + break; + case kUVWReflect: + { + sampleCoord = matrix * reflect(normalize(camPosition), normalize(normal)); + } + break; + default: + { + int index = UVWSrc & 0x0f; + sampleCoord = matrix * float4(texCoords[index], 1.0); + } + break; + } + return sampleCoord.xyz; +} + +half4 blendLayer(plFragmentShaderLayer layer, float3 sampleCoord, half4 color, texture2d texture, thread texturecube *cubicTexture) { + + constexpr sampler colorSamplers[] = { + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::clamp_to_edge, + t_address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::repeat, + t_address::clamp_to_edge), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_edge), + + }; + + ushort passType = layer.passType; + + if(passType == PassTypeColor) { + return color; + } else { + + if (layer.miscFlags & kMiscPerspProjection) { + sampleCoord.xy = sampleCoord.xy / sampleCoord.z; + } + + int colorSamplerIndex = layer.sampleType; + //do the actual sample + if(passType == PassTypeTexture) { + texture2d colorMap = texture; + return colorMap.sample(colorSamplers[colorSamplerIndex], sampleCoord.xy); + } else if(passType == PassTypeCubicTexture) { + thread texturecube *colorMap = cubicTexture; + return colorMap->sample(colorSamplers[colorSamplerIndex], sampleCoord.xyz); + } else { + return half4(0); + } + } +} + +fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + FragmentShaderArguments fragmentShaderArgs) +{ + half4 currentColor = in.vtxColor; + + uint i = 0; + for(i=i; i< num_layers; i++) { + plFragmentShaderLayer layer = fragmentShaderArgs.bufferedUniforms->layers[i]; + + thread texturecube* cubicTexture = &(fragmentShaderArgs.cubicTextures[i]); + half4 color = blendLayer(layer, (&in.texCoord1)[i], currentColor, fragmentShaderArgs.textures[i], cubicTexture); + if(i==0) { + blendFirst(color, currentColor, layer.blendMode); + } else { + blend(color, currentColor, layer.blendMode); + } + } + + currentColor = half4(in.vtxColor.rgb, 1.0) * currentColor; + currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb * currentColor.a, 1.0f - clamp((float)in.fogColor.a, 0.0f, 1.0f)); + + if (currentColor.a < fragmentShaderArgs.bufferedUniforms->alphaThreshold) { discard_fragment(); } + + return currentColor; +} + +inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { + // Local variable to store the color value + if (blendFlags & kBlendInvertColor) { + srcSample.rgb = 255 - srcSample.rgb; + } + + // Leave fCurrColor null if we are blending without texture color + if (!(blendFlags & kBlendNoTexColor)) { + destSample.rgb = srcSample.rgb; + } + + if (blendFlags & kBlendInvertAlpha) { + // 1.0 - texture.a + srcSample.a = 255 - srcSample.a; + } + + if (!(blendFlags & kBlendNoTexAlpha)) { + // Vertex alpha * base texture alpha + destSample.a = destSample.a * srcSample.a; + } +} + +inline void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { + // Local variable to store the color value + if (blendFlags & kBlendInvertColor) { + srcSample.rgb = 255 - srcSample.rgb; + } + + switch (blendFlags & kBlendMask) + { + + case kBlendAddColorTimesAlpha: + //hsAssert(false, "Blend mode unsupported on upper layers"); + break; + + case kBlendAlpha: + { + if (!(blendFlags & kBlendNoTexColor)) { + if (blendFlags & kBlendInvertAlpha) { + // color = texture.rgb + (texture.a * prev) + destSample.rgb = (srcSample.rgb + (srcSample.a * destSample.rgb)); + } else { + // color = mix(prev, texture.rgb, texture.a) + destSample.rgb = mix(destSample.rgb, srcSample.rgb, srcSample.a); + } + } + + if (blendFlags & kBlendInvertAlpha) { + // 1.0 - texture.a + srcSample.a = 1.0 - srcSample.a; + } else { + // texture.a + srcSample.a = srcSample.a; + } + + if (blendFlags & kBlendAlphaAdd) { + // alpha = alphaVal + prev + destSample.a = srcSample.a + destSample.a; + } else if (blendFlags & kBlendAlphaMult) { + // alpha = alphaVal * prev + destSample.a = srcSample.a * destSample.a; + } + break; + } + + case kBlendAdd: + { + // color = texture.rgb + prev + destSample.rgb = srcSample.rgb + destSample.rgb; + + break; + } + + case kBlendMult: + { + // color = color * prev + destSample.rgb = srcSample.rgb * destSample.rgb; + break; + } + + case kBlendDot3: + { + // color = (color.r * prev.r + color.g * prev.g + color.b * prev.b) + destSample = dot(srcSample.rgb, destSample.rgb); + break; + } + + case kBlendAddSigned: + { + // color = color + prev - 0.5 + destSample.rgb = srcSample.rgb + destSample.rgb - 0.5; + break; + } + + case kBlendAddSigned2X: + { + // color = (color + prev - 0.5) << 1 + // Note: using CALL here for multiplication to ensure parentheses + destSample.rgb = 2 * (srcSample.rgb + destSample.rgb - 0.5); + break; + } + + case 0: + { + // color = texture.rgb + destSample.rgb = srcSample.rgb; + break; + } + } +} + +fragment float4 shadowFragmentShader(ColorInOut in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + texture2d colorMap [[ texture(0) ]]) +{ + constexpr sampler colorSamplers[] = { + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::clamp_to_edge, + t_address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::repeat, + t_address::clamp_to_edge), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_edge), + + }; + + //D3DTTFF_COUNT3, D3DTSS_TCI_CAMERASPACEPOSITION + ushort4 currentColor = colorMap.sample(colorSamplers[3], in.texCoord1.xy); + + return float4(1.0, 1.0, 1.0, float(currentColor.a)/255.0f); +} + + +fragment float4 shadowCastFragmentShader(ColorInOut in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + texture2d texture [[ texture(16) ]], + texture2d LUT [[ texture(17) ]], + constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(BufferIndexShadowCastFragArgBuffer) ]], + FragmentShaderArguments layers, + constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowAlphaSrc) ]]) +{ + + constexpr sampler colorSamplers[] = { + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::clamp_to_edge, + t_address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::repeat, + t_address::clamp_to_edge), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_edge), + + }; + + float3 sampleCoords = in.texCoord1; + if(fragmentUniforms.pointLightCast) { + sampleCoords.xy /= sampleCoords.z; + } + float4 currentColor = float4(texture.sample(colorSamplers[3], sampleCoords.xy)); + currentColor.rgb *= float3(in.vtxColor.rgb); + + float3 LUTCoords = in.texCoord2; + float4 LUTColor = float4(LUT.sample(colorSamplers[3], LUTCoords.xy))/255.0f; + + currentColor.rgb = (1.0 - LUTColor.rgb) * currentColor.rgb; + currentColor.a = LUTColor.a - currentColor.a; + + if(alphaSrc != -1) { + half4 layerColor = blendLayer(layers.bufferedUniforms->layers[alphaSrc], in.texCoord3, half4(layers.colors[alphaSrc]), layers.textures[alphaSrc], nullptr); + + currentColor.rgb *= layerColor.a; + currentColor.rgb *= uniforms.diffuseCol.a; + } + + //alpha blend goes here + + if(currentColor.a <= 0.0) + discard_fragment(); + + return currentColor; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal new file mode 100644 index 0000000000..145c6c8ba7 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal @@ -0,0 +1,86 @@ +// +// GrassShader.metal +// plGLClient +// +// Created by Colin Cornaby on 1/1/22. +// + +#include +using namespace metal; + +#include "ShaderVertex.h" + +//ignoring the int and pi constants here and using whats built in +//but reserving space for them in the buffer +typedef struct { + matrix_float4x4 Local2NDC; + float4 intConstants; + float4 time; + float4 piConstants; + float4 sinConstants; + float4 waveDistortX; + float4 waveDistortY; + float4 waveDistortZ; + float4 waveDirX; + float4 waveDirY; + float4 waveSpeed; +} vs_GrassUniforms; + +typedef struct { + float4 position [[position]]; + float4 color; + float4 texCoord; +} vs_GrassInOut; + +vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], + constant vs_GrassUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + vs_GrassInOut out; + + float4 r0 = (in.position.x * uniforms.waveDirX) + (in.position.y * uniforms.waveDirX); + + r0 += (uniforms.time.x * uniforms.waveSpeed); // scale by speed and add to X,Y input + r0 = fract(r0); + + r0 = (r0 - 0.5) * M_PI_F * 2; + + float4 pow2 = r0 * r0; + float4 pow3 = pow2 * r0; + float4 pow5 = pow2 * pow3; + float4 pow7 = pow2 * pow5; + float4 pow9 = pow2 * pow7; + + r0 += pow3 * uniforms.sinConstants.x; + r0 += pow5 * uniforms.sinConstants.y; + r0 += pow7 * uniforms.sinConstants.z; + r0 += pow9 * uniforms.sinConstants.w; + + float3 offset = float3( + dot(r0, uniforms.waveDistortX), + dot(r0, uniforms.waveDistortY), + dot(r0, uniforms.waveDistortZ) + ); + + offset *= (2.0 * (1.0 - in.texCoord1.y)); // mult by Y tex coord. So the waves only affect the top verts + + float4 position = float4(in.position.xyz + offset, 1); + out.position = position * uniforms.Local2NDC; + + out.color = float4(in.color.r, in.color.g, in.color.b, in.color.a) / 255.0; + out.texCoord = float4(in.texCoord1, 0.0); + + return out; +} + +fragment float4 ps_GrassShader(vs_GrassInOut in [[stage_in]], + texture2d t0 [[ texture(0) ]]) { + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + + float4 out = t0.sample(colorSampler, in.texCoord.xy); + out *= in.color; + if(out.a <= 0.1) + discard_fragment(); + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal new file mode 100644 index 0000000000..5e38fca6eb --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal @@ -0,0 +1,102 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; +// File for Metal kernel and shader functions + +#include +#include + +// Including header shared between this Metal shader code and Swift/C code executing Metal API commands +#import "ShaderTypes.h" + +using namespace metal; + +typedef struct { + array, 8> textures [[ id(FragmentShaderArgumentAttributeTextures) ]]; + array, 8> cubicTextures [[ id(FragmentShaderArgumentAttributeCubicTextures) ]]; + array colors [[ id(FragmentShaderArgumentAttributeColors) ]]; + plMetalFragmentShaderArgumentBuffer uniforms [[ id(FragmentShaderArgumentAttributeUniforms) ]]; +} FragmentShaderArguments; + +typedef struct +{ + float2 position [[attribute(VertexAttributePosition)]]; + float3 texCoord [[attribute(VertexAttributeTexcoord)]]; +} PlateVertex; + +typedef struct +{ + float4 position [[position]]; + float3 texCoord; + float4 normal; +} ColorInOut; + +vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + uint v_id [[vertex_id]]) +{ + ColorInOut out; + + float4 position = float4(in.position, 0.0, 1.0); + position = uniforms.projectionMatrix * position; + out.position = (uniforms.localToWorldMatrix * position); + out.texCoord = (float4(in.texCoord, 1.0) * uniforms.uvTransforms[0].transform).xyz; + out.normal = float4(0.0, 0.0, 1.0, 0.0); + + return out; +} + +fragment float4 fragmentShader(ColorInOut in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + device plMetalFragmentShaderArgumentBuffer & fragmentShaderArgs [[ buffer(BufferIndexFragArgBuffer) ]], + texture2d colorMap [[ texture(Texture) ]]) +{ + constexpr sampler colorSampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear); + + half4 colorSample = colorMap.sample(colorSampler, in.texCoord.xy); + + return float4(colorSample); +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h new file mode 100644 index 0000000000..b807bc51d0 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -0,0 +1,165 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef ShaderTypes_h +#define ShaderTypes_h + +#include + +enum plMetalShaderArgumentIndex +{ + //Texture is a legacy argument for the simpler plate shader + Texture = 1, + BufferIndexState = 2, + BufferIndexUniforms = 3, + BufferIndexFragArgBuffer = 5, + BufferIndexShadowCastFragArgBuffer = 4 +}; + +enum plMetalVertexShaderUniform +{ + VertexAttributePosition = 0, + VertexAttributeTexcoord = 1, + VertexAttributeNormal = 9, + VertexAttributeUVCount = 10, + VertexAttributeColor = 11 +}; + +enum plMetalFragmentShaderUniform +{ + FragmentShaderArgumentShadowAlphaSrc = 8, + FragmentShaderArgumentPiggybackLayers = 9, + FragmentShaderArgumentNumPiggybackLayers = 10, + FragmentShaderOverrideLayer = 11 +}; + +enum plMetalFunctionConstant +{ + FunctionConstantNumUVs = 0, + FunctionConstantNumLayers = 1 +}; + +enum plMetalLayerPassType +{ + PassTypeTexture = 1, + PassTypeCubicTexture = 2, + PassTypeColor = 3 +}; + +struct plFragmentShaderLayer { + ushort passType; + uint uvIndex; + uint32_t blendMode; + uint32_t miscFlags; + short sampleType; +}; + +struct plMetalFragmentShaderArgumentBuffer { + ushort layerCount; + float alphaThreshold; + plFragmentShaderLayer layers[8]; +}; + +struct plMetalShadowCastFragmentShaderArgumentBuffer { + bool pointLightCast; +}; + +enum plMetalFragmentShaderTextures { + FragmentShaderArgumentAttributeTextures = 0, + FragmentShaderArgumentAttributeCubicTextures = 8, + FragmentShaderArgumentAttributeColors = 16, + FragmentShaderArgumentAttributeUniforms = 32 +}; + +struct plMetalShaderLightSource { + simd::float4 position; + simd::float4 ambient; + simd::float4 diffuse; + simd::float4 specular; + simd::float3 direction; + simd::float4 spotProps; // (falloff, theta, phi) + float constAtten; + float linAtten; + float quadAtten; + float scale; +}; + +typedef struct +{ + uint UVWSrc; + uint flags; + matrix_float4x4 transform; +} UVOutDescriptor; + +typedef struct +{ + //transformation + matrix_float4x4 projectionMatrix; + matrix_float4x4 localToWorldMatrix; + matrix_float4x4 worldToLocalMatrix; + matrix_float4x4 cameraToWorldMatrix; + matrix_float4x4 worldToCameraMatrix; + + //lighting + simd::float4 globalAmb; + simd::float4 ambientCol; + float ambientSrc; + simd::float4 diffuseCol; + float diffuseSrc; + simd::float4 emissiveCol; + float emissiveSrc; + simd::float4 specularCol; + float specularSrc; + bool invVtxAlpha; + + uint fogExponential; + simd::float2 fogValues; + simd::float3 fogColor; + + plMetalShaderLightSource lampSources[8]; + + uint numUVSrcs; + UVOutDescriptor uvTransforms[8]; +} VertexUniforms; + +#endif /* ShaderTypes_h */ + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h new file mode 100644 index 0000000000..8859dc60f3 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h @@ -0,0 +1,77 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "ShaderTypes.h" + +constant ushort num_uvs [[ function_constant(FunctionConstantNumUVs) ]]; +constant bool hasTexture1 = num_uvs > 0; +constant bool hasTexture2 = num_uvs > 1; +constant bool hasTexture3 = num_uvs > 2; +constant bool hasTexture4 = num_uvs > 3; +constant bool hasTexture5 = num_uvs > 4; +constant bool hasTexture6 = num_uvs > 5; +constant bool hasTexture7 = num_uvs > 6; +constant bool hasTexture8 = num_uvs > 7; + +constant ushort num_layers [[ function_constant(FunctionConstantNumLayers) ]]; +constant bool hasLayer1 = num_layers > 0; +constant bool hasLayer2 = num_layers > 1; +constant bool hasLayer3 = num_layers > 2; +constant bool hasLayer4 = num_layers > 3; +constant bool hasLayer5 = num_layers > 4; +constant bool hasLayer6 = num_layers > 5; +constant bool hasLayer7 = num_layers > 6; +constant bool hasLayer8 = num_layers > 7; + +typedef struct +{ + float3 position [[attribute(VertexAttributePosition)]]; + float3 normal [[attribute(VertexAttributeNormal)]]; + float3 texCoord1 [[attribute(VertexAttributeTexcoord), function_constant(hasTexture1)]]; + float3 texCoord2 [[attribute(VertexAttributeTexcoord+1), function_constant(hasTexture2)]]; + float3 texCoord3 [[attribute(VertexAttributeTexcoord+2), function_constant(hasTexture3)]]; + float3 texCoord4 [[attribute(VertexAttributeTexcoord+3), function_constant(hasTexture4)]]; + float3 texCoord5 [[attribute(VertexAttributeTexcoord+4), function_constant(hasTexture5)]]; + float3 texCoord6 [[attribute(VertexAttributeTexcoord+5), function_constant(hasTexture6)]]; + float3 texCoord7 [[attribute(VertexAttributeTexcoord+6), function_constant(hasTexture7)]]; + float3 texCoord8 [[attribute(VertexAttributeTexcoord+7), function_constant(hasTexture8)]]; + uchar4 color [[attribute(VertexAttributeColor)]]; +} Vertex; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal new file mode 100644 index 0000000000..cb995d7023 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -0,0 +1,402 @@ +// +// WaveDecEnv.metal +// plGLClient +// +// Created by Colin Cornaby on 1/2/22. +// + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct { + matrix_float4x4 WorldToNDC; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 Scrunch; // UNUSED + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 Tex0_Row0; + float4 Tex0_Row1; + float4 Tex1_Row0; + float4 Tex1_Row1; + float4 L2WRow0; + float4 L2WRow1; + float4 L2WRow2; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 Bias; // Only using one slot + float4 MatColor; + float4 CameraPos; // Only used by DecalEnv + float4 EnvAdjust; // Only used by DecalEnv + float4 FogSet; + float4 QADirX; + float4 QADirY; + + float4 DirXW; // Only used by DecalEnv + float4 DirYW; // Only used by DecalEnv + float4 WK; // Only used by DecalEnv + float4 DirXSqKW; // Only used by DecalEnv + float4 DirXDirYKW; // Only used by DecalEnv + float4 DirYSqKW; // Only used by DecalEnv +} vs_WaveDecEnv7Uniforms; + +typedef struct { + float4 position [[position]]; + float4 c1; + float4 texCoord0; + float4 texCoord1; + float4 texCoord2; + float4 texCoord3; + float fog; +} vs_WaveDecEnv7InOut; + +vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], + constant vs_WaveDecEnv7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + vs_WaveDecEnv7InOut out; + + // Store our input position in world space in r6 + float4 worldPosition = float4(0); + worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); + worldPosition.y = dot(float4(in.position, 1.0), uniforms.L2WRow1); + worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); + // Fill out our w (m4x3 doesn't touch w). + worldPosition.w = 1.0; + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = illumination + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c22 = waterlevel + offset + // c23 = (maxAtten - minAtten) / depthFalloff + // c24 = minAtten. + // And in particular: + // c22.w = waterlevel + // c23.w = 1.f; + // c24.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance += uniforms.DirectionY * worldPosition.yyyy; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = (distance * uniforms.Frequency) + uniforms.Phase; + + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + distance += uniforms.PiConsts.zzzz; + distance *= 1.0f / uniforms.PiConsts.wwww; + + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= uniforms.PiConsts.wwww; + // dist += -kPi; + distance += uniforms.PiConsts.zzzz; + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = max(filter, uniforms.NumericConsts.xxxx); + filter = min(filter, uniforms.NumericConsts.zzzz); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = float4(0); + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= kAmplitude.xyzw; // Combine? + //METAL NOTE: cosDist is now r7 + cosDist *= uniforms.Amplitude; + // cosDist *= filter; + cosDist *= filter; + // Pos = (in.x + S, in.y + R, r6.z) + // S = sum(k Dir.x A cos()) + // R = sum(k Dir.y A cos()) + // c30 = k Dir.x A + // c31 = k Dir.y A + // S = sum(cosDist * c30); + worldPosition.xy += float2( + dot(cosDist, uniforms.QADirX), + dot(cosDist, uniforms.QADirY) + ); + + // Bias our vert up a bit to compensate for precision errors. + // In particular, our filter coefficients are coming in as + // interpolated bytes, so there's bound to be a lot of slop + // from that. We've got a free slot in c25.x, so we'll use that. + // A better implementation would be to bias and scale our screen + // vert, effectively pushing the vert toward the camera without + // actually moving it, but this is easier and might work just + // as well. + worldPosition.z += uniforms.Bias.x; + + // + // // Transform position to screen + // + // + out.position = worldPosition * uniforms.WorldToNDC; + out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; + + // Now onto texture coordinate generation. + // + // First is the usual texture transform + out.texCoord0 = float4( + dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0), + dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1), + uniforms.NumericConsts.zz + ); + + // Calculate our basis vectors as input into our tex3x3vspec + // First we get our basis set off our surface. This is + // Okay, here we go: + // W == sum(k w Dir.x^2 A sin()) x + // V == sum(k w Dir.x Dir.y A sin()) x + // U == sum(k w Dir.y^2 A sin()) x + // + // T == sum(A sin()) + // + // S == sum(k Dir.x A cos()) + // R == sum(k Dir.y A cos()) + // + // Q == sum(k w A cos()) x + // + // M == sum(A cos()) + // + // P == sum(w Dir.x A cos()) x + // N == sum(w Dir.y A cos()) x + // + // Then: + // Pos = (in.x + S, in.y + R, waterheight + T) // Already done above. + // + // Bin = (1 - W, -V, P) + // Tan = (-V, 1 - U, N) + // Nor = (-P, -N, 1 - Q) + // + // The matrix + // |Bx, Tx, Nx| + // |By, Ty, Ny| + // |Bz, Tz, Nz| + // is surface2world, but we still need to fold in + // texture2surface. We'll go with the generalized + // (not assuming a flat surface) partials of dPos/dU and dPos/dV + // as coming in as uv coords v8 and v9. + // Then, if r5 = v8 X v9, then texture to surface is + // |v8.x, v9.x, r5.x| + // |v8.y, v9.y, r5.y| + // |v8.z, v9.z, r5.z| + // + // So, let's say we calc 3 vectors, + // r7 = (Bx, Tx, Nx) + // r8 = (By, Ty, Ny) + // r9 = (Bz, Tz, Nz) + // + // Then surface2world * texture2surface = + // |r7 dot v8, r7 dot v9, r7 dot r5| + // |r8 dot v8, r8 dot v9, r8 dot r5| + // |r9 dot v8, r9 dot v9, r9 dot r5| + // + // We will need r5 as v8 X v9 + + float4 r7 = float4(in.texCoord2, 1.0); + float4 r5 = float4(0); + r5.xyz = r7.yzx * in.texCoord3; + r5.xyz = (r7.zxy * -in.texCoord3) + r5.xyz; + + // Okay, r1 currently has the vector of cosines, and r2 has vector of sines. + // Everything will want that times amplitude, so go ahead and fold that in. + cosDist *= uniforms.Phase; + + r7.x = dot(sinDist, -uniforms.DirXSqKW); + r7.y = dot(sinDist, -uniforms.DirXDirYKW); + r7.z = dot(cosDist, -uniforms.DirXW); + r7.x += uniforms.NumericConsts.z; + + float4 r8 = float4(0); + r8.x = dot(sinDist, -uniforms.DirXDirYKW); + r8.y = dot(sinDist, -uniforms.DirYSqKW); + r8.z = dot(cosDist, uniforms.DirYW); + r8.y = r8.y + uniforms.NumericConsts.z; + + float4 r9 = float4(0); + r9.z = dot(cosDist, -uniforms.WK); + r9.x = -r7.z; + r9.y = -r8.z; + r9.z = r9.z + uniforms.NumericConsts.z; + + // Okay, got everything we need, construct r1-3 as surface2world*texture2surface. + float4 r1, r2, r3 = float4(0); + r1.x = dot(r7, float4(in.texCoord1, 1.0)); + r1.y = dot(r7, float4(in.texCoord2, 1.0)); + r1.z = dot(r7, r5); + + r2.x = dot(r8, float4(in.texCoord1, 1.0)); + r2.y = dot(r8, float4(in.texCoord2, 1.0)); + r2.z = dot(r8, r5); + + r3.x = dot(r9, float4(in.texCoord1, 1.0)); + r3.y = dot(r9, float4(in.texCoord2, 1.0)); + r3.z = dot(r9, r5); + + // Following section is debug only to skip the per-vert tangent space axes. + //add r1, c13.zxxx, r7.zzxw; + //add r2, c13.xzxx, r7.zzyw; + // + //mov r3.x, -r7.x; + //mov r3.y, -r7.y; + //mov r3.zw, c13.zz; + + // See vs_WaveFixedFin6.inl for derivation of the following + float4 r0 = worldPosition - uniforms.CameraPos; + r0 *= rsqrt(dot(r0, r0)); + + float4 r10 = float4(0); + r10.x = dot(r0, uniforms.EnvAdjust); + r10.y = (r10.x * r10.x) - uniforms.EnvAdjust.w; + + r10.z = (r10.y * rsqrt(r10.y)) + r10.x; + r0.xyz = (r0.xyz * r10.zzz) - uniforms.EnvAdjust.xyz; + + // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. + r0.xyz = normalize(r0.xyz); + + r1.w = -r0.x; + r2.w = -r0.y; + r3.w = -r0.z; + + // Now r1-r3 are texture2world, with the eye-ray vector in .w. We just + // need to normalize them and bung them into output UV's 1-3. + // Note we're accounting for our environment map being flipped from + // D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2. + r10.w = uniforms.NumericConsts.z; + r10.x = rsqrt(dot(r1, r1)); + out.texCoord0 = r1 * r10.xxxw; + + r10.x = rsqrt(dot(r3, r3)); + out.texCoord1 = r3 * r10.xxxw; + + r10.x = rsqrt(dot(r2, r2)); + out.texCoord2 = r2 * r10.xxxw; + + out.c1 = clamp(float4(in.color).yyyx/255.0 * uniforms.MatColor, 0.0, 1.0); + + return out; +} + +fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], + texture2d normalMap [[ texture(0) ]], + texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3) ]]) { + // Very simular to ps_WaveFixed.inl. Only the final coloring is different. + // Even though so far they are identical. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + float4 t0 = 2 * normalMap.sample(colorSampler, in.texCoord0.xy) - 0.5; + float u = dot(in.texCoord1, t0); + float v = dot(in.texCoord2, t0); + float w = dot(in.texCoord3, t0); + + float3 N = float3(u, v, w); + float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); + + //float3 coord = reflect(E, N); + float3 coord = 2*(dot(N, E) / dot(N, N))*N - E; + + // t3 now has our reflected environment map value + // We've (presumably) attenuated the effect on a vertex basis + // and have our color w/ attenuated alpha in v0. So all we need + // is to multiply t3 by v0 into r0 and we're done. + float4 out = float4(environmentMap.sample(colorSampler, coord)); + out.rgb = (out.rgb * in.c1.rgb); + out.a = t0.x * in.c1.x; + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal new file mode 100644 index 0000000000..803b0d0fb7 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -0,0 +1,474 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct { + matrix_float4x4 WorldToNDC; + float4 WaterTint; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 UVScale; + float4 SpecAtten; + float4 Scrunch; + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 CameraPos; + float4 WindRot; + float4 EnvAdjust; + float4 EnvTint; + float4 LocalToWorldRow1; + float4 LocalToWorldRow2; + float4 LocalToWorldRow3; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 FogSet; + float4 DirXK; + float4 DirYK; + float4 DirXW; + float4 DirYW; + float4 WK; + float4 DirXSqKW; + float4 DirXDirYKW; + float4 DirYSqKW; +} vs_WaveFixedFin7Uniforms; + +typedef struct { + float4 position [[position]]; + float4 c1; + float4 c2; + float4 texCoord0; + float4 texCoord1; + float4 texCoord2; + float4 texCoord3; + float fog; +} vs_WaveFixedFin7InOut; + +vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], + constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + vs_WaveFixedFin7InOut out; + + // Store our input position in world space in r6 + float3 column1 = float3(uniforms.LocalToWorldRow1[0], uniforms.LocalToWorldRow2[0], uniforms.LocalToWorldRow3[0]); + float3 column2 = float3(uniforms.LocalToWorldRow1[1], uniforms.LocalToWorldRow2[1], uniforms.LocalToWorldRow3[1]); + float3 column3 = float3(uniforms.LocalToWorldRow1[2], uniforms.LocalToWorldRow2[2], uniforms.LocalToWorldRow3[2]); + + matrix_float3x3 localToWorld; + localToWorld[0] = column1; + localToWorld[1] = column2; + localToWorld[2] = column3; + + float4 worldPosition = float4(in.position * localToWorld, uniforms.NumericConsts.z); + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = reflection strength (transparency) + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c25 = waterlevel + offset + // c26 = (maxAtten - minAtten) / depthFalloff + // c27 = minAtten. + // And in particular: + // c25.w = waterlevel + // c26.w = 1.f; + // c27.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance = (uniforms.DirectionY * worldPosition.yyyy) + distance; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = distance * uniforms.Frequency; + distance = distance + uniforms.Phase; + // + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + float4 piRecip = 1.0f / uniforms.PiConsts.wwww; + distance = distance + uniforms.PiConsts.zzzz; + distance *= piRecip; + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= uniforms.PiConsts.wwww; + // dist += -kPi; + distance -= uniforms.PiConsts.zzzz; + + //Metals pow function does not like negative bases + //Doing the same thing as the DX assembly until I know more about why + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = max(filter, uniforms.NumericConsts.xxxx); + filter = min(filter, uniforms.NumericConsts.zzzz); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = 0; + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= kAmplitude.xyzw; // Combine? + //METAL NOTE: cosDist is now r7 + cosDist *= uniforms.Amplitude; + // cosDist *= filter; + cosDist *= filter; + // r7 is now M = sum(Ai * cos()) + + // Okay, here we go: + // W == sum(k w Dir.x^2 A sin()) + // V == sum(k w Dir.x Dir.y A sin()) + // U == sum(k w Dir.y^2 A sin()) + // + // T == sum(A sin()) + // + // S == sum(k Dir.x A cos()) + // R == sum(k Dir.y A cos()) + // + // Q == sum(k w A cos()) + // + // M == sum(A cos()) + // + // P == sum(w Dir.x A cos()) + // N == sum(w Dir.y A cos()) + // + // Then: + // Pos = (in.x + S, in.y + R, waterheight + T) + // + // Bin = (1 - W, -V, P) + // Tan = (-V, 1 - U, N) + // Nor = (-P, -N, 1 - Q) + // + // But we want the transpose of that to go into r1-r3 + + worldPosition.x += dot(cosDist, uniforms.DirXK); + worldPosition.y += dot(cosDist, uniforms.DirYK); + + float4 r1, r2, r3 = 0; + + r1.x = dot(sinDist, -uniforms.DirXSqKW); + r2.x = dot(sinDist, -uniforms.DirXDirYKW); + r3.x = dot(cosDist, uniforms.DirXW); + r1.x = r1.x + uniforms.NumericConsts.z; + + r1.y = dot(sinDist, -uniforms.DirXDirYKW); + r2.y = dot(sinDist, -uniforms.DirYSqKW); + r3.y = dot(cosDist, uniforms.DirYW); + r2.y = r2.y + uniforms.NumericConsts.z; + + r1.z = dot(cosDist, -uniforms.DirXW); + r2.z = dot(cosDist, -uniforms.DirYW); + r3.z = dot(sinDist, -uniforms.WK); + r3.z = r3.z + uniforms.NumericConsts.z; + + // Calculate our normalized vector from camera to vtx. + // We'll use that a couple of times coming up. + float4 r5 = worldPosition - uniforms.CameraPos; + float4 r10; + r10.x = rsqrt(dot(r5.xyz, r5.xyz)); + r5 = r5 * r10.xxxx; + r5.w = 1.0 / r10.x; + + // Calculate our specular attenuation from and into r5.w. + // r5.w starts off the distance from vtx to camera. + // Once we've turned it into an attenuation factor, we + // scale the x and y of our normal map (through the transform bases) + // so that in the distance, the normal map is flat. Note that the + // geometry in the distance isn't necessarily flat. We want to apply + // this scale to the normal read from the normal map before it is + // transformed into surface space. + r5.w += uniforms.SpecAtten.x; + r5.w *= uniforms.SpecAtten.y; + r5.w = min(r5.w, uniforms.NumericConsts.z); + r5.w = max(r5.w, uniforms.NumericConsts.x); + r5.w *= r5.w; // Square it to account for perspective + r5.w *= uniforms.SpecAtten.z; + + // So, our "finitized" eyeray is: + // camPos + D * t - envCenter = D * t - (envCenter - camPos) + // with + // D = (pos - camPos) / |pos - camPos| // normalized usual eyeray + // and + // t = D dot F + sqrt( (D dot F)^2 - G ) + // with + // F = (envCenter - camPos) => c19.xyz + // G = F^2 - R^2 => c19.w + // R = environment radius. => unused + // + // This all derives from the positive root of equation + // (camPos + (pos - camPos) * t - envCenter)^2 = R^2, + // In other words, where on a sphere of radius R centered about envCenter + // does the ray from the real camera position through this point hit. + // + // Note that F, G, and R are all constants (one point, two scalars). + // + // So first we calculate D into r0, + // then D dot F into r10.x, + // then (D dot F)^2 - G into r10.y + // then rsq( (D dot F)^2 - G ) into r9.x; + // then t = r10.z = r10.x + r10.y * r9.x; + // and + // r0 = D * t - (envCenter - camPos) + // = r0 * r10.zzzz - F; + // + //https://developer.download.nvidia.com/books/HTML/gpugems/gpugems_ch01.html + + float4 r0; + + { + float3 D = r5.xyz; + float3 F = uniforms.EnvAdjust.xyz; + float G = uniforms.EnvAdjust.w; + float3 t = dot(D, F) + sqrt(pow(dot(D, F), 2) - G);// r10.z = D dot F + SQRT((D dot F)^2 - G) + r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) + } + + // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. + r0.xyz = normalize(r0.xyz); + + r1.w = -r0.x; + r2.w = -r0.y; + r3.w = -r0.z; + + r0.zw = uniforms.NumericConsts.xz; + + float4 r11 = float4(0); + + r0.x = dot(r1.xyz, r1.xyz); + r0.xy = rsqrt(r0.x); + r0.x *= r5.w; + out.texCoord1 = r1 * r0.xxyw; + r11.x = r1.z * r0.y; + + r0.x = dot(r2.xyz, r2.xyz); + r0.xy = rsqrt(r0.x); + r0.x *= r5.w; + out.texCoord3 = r2 * r0.xxyw; + r11.y = r2.z * r0.y; + + r0.x = dot(r3.xyz, r3.xyz); + r0.xy = rsqrt(r0.x); + r0.x *= r5.w; + out.texCoord2 = r3 * r0.xxyw; + r11.z = r3.z * r0.y; + + /* + // Want: + // oT1 = (BIN.x, TAN.x, NORM.x, view2pos.x) + // oT2 = (BIN.y, TAN.y, NORM.y, view2pos.y) + // ot3 = (BIN.z, TAN.z, NORM.z, view2pos.z) + // with BIN, TAN, and NORM normalized. + // Unnormalized, we have + // BIN = (1, 0, -r7.x) where r7 == accumCos + // TAN = (0, 1, -r7.y) + // NORM= (r7.x, r7.y, 1) + // So, unnormalized, we have + // oT1 = (1, 0, r7.x, view2pos.x) + // oT2 = (0, 1, r7.y, view2pos.y) + // oT3 = (-r7.x, -r7.y, 1, view2pos.z) + // which is just reversing the signs on the accumCos + // terms above. So the normalized version is just + // reversing the signs on the normalized version above. + */ + //mov oT3, r4; + + // + // // Transform position to screen + // + // + float4 r9; + r9 = worldPosition * uniforms.WorldToNDC; + r10.x = r9.w + uniforms.FogSet.x; + out.fog = r10.x * uniforms.FogSet.y; + out.position = r9; + + // Transform our uvw + out.texCoord0 = float4(in.position.xy * uniforms.UVScale.x, + 0, 1); + + // Questionble attenuation follows + // vector from this point to camera and normalize stashed in r5 + // Dot that with the computed normal + r1.x = dot(-r5, r11); + r1.x = r1.x * inColor.z; + r1.xyzw = uniforms.NumericConsts.z - r1.x; + r1.w += uniforms.NumericConsts.z; + r1.w *= uniforms.NumericConsts.y; + // No need to clamp, since the destination register (in the pixel shader) + // will saturate [0..1] anyway. + r1 *= depth.yyyx; // HACKTESTCOLOR + //R in the in color is the alpha value, but remember it's encoded ARGB + r1.w *= inColor.g; + r1.w *= uniforms.WaterTint.w; + out.c1 = clamp(r1 * uniforms.EnvTint, 0, 1); + out.c2 = uniforms.WaterTint; // SEENORM + + return out; +} + +fragment float4 ps_WaveFixed(vs_WaveFixedFin7InOut in [[stage_in]], + texture2d normalMap [[ texture(0) ]], + texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3) ]]) { + // Short pixel shader. Use the texm3x3vspec to do a per-pixel + // reflected lookup into our environment map. + // Input: + // t0 - Normal map in tangent space. Apply _bx2 modifier to shift + // [0..255] -> [-1..1] + // t1 - UVW = tangent + eye2pos.x, map ignored. + // t2 - UVW = binormal + eye2pos.y, map ignored + // t3 - UVW = normal + eye2pos.z, map = environment cube map + // v0 - attenuating color/alpha. + // See docs on texm3x3vspec for explanation of the eye2pos wackiness. + // Output: + // r0 = reflected lookup from environment map X input v0. + // Since environment map has alpha = 255, the output of this + // shader can be used for either alpha or additive blending, + // as long as v0 is fed in appropriately. + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + float3 t0 = 2 * (normalMap.sample(colorSampler, in.texCoord0.xy).rgb - 0.5); + float u = dot(in.texCoord1.xyz, t0); + float v = dot(in.texCoord2.xyz, t0); + float w = dot(in.texCoord3.xyz, t0); + + float3 N = float3(u, v, w); + float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); + + //float3 coord = reflect(E, N); + float3 coord = 2*(dot(N, E) / dot(N, N))*N - E; + + float4 out = float4(environmentMap.sample(colorSampler, coord)); + out = (out * in.c1) + in.c2; + out.a = in.c1.a; + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/pfMetalPipelineCreatable.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/pfMetalPipelineCreatable.h new file mode 100644 index 0000000000..c09dd945ed --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/pfMetalPipelineCreatable.h @@ -0,0 +1,49 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef pfMetalPipelineCreatable_inc +#define pfMetalPipelineCreatable_inc + +#include "plMetalPipeline.h" +REGISTER_NONCREATABLE(plMetalPipeline); + +#endif // pfGLPipelineCreatable_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp new file mode 100644 index 0000000000..9d9421f29a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -0,0 +1,1145 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +//We need to define these once for Metal somewhere in a cpp file +#define NS_PRIVATE_IMPLEMENTATION +#define CA_PRIVATE_IMPLEMENTATION +#define MTL_PRIVATE_IMPLEMENTATION + +#include +#include "plMetalDevice.h" +#include "plMetalPipeline.h" +#include "ShaderTypes.h" + + +#include "hsThread.h" +#include "plDrawable/plGBufferGroup.h" +#include "plGImage/plMipmap.h" +#include "plGImage/plCubicEnvironmap.h" +#include "plPipeline/plRenderTarget.h" + +matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder) +{ + if (src.fFlags & hsMatrix44::kIsIdent) + { + memcpy(dst, &matrix_identity_float4x4, sizeof(float) * 16); + } + else + { + //SIMD is column major, hsMatrix44 is row major. + //We need to flip. + if(swapOrder) { + dst->columns[0][0] = src.fMap[0][0]; + dst->columns[1][0] = src.fMap[0][1]; + dst->columns[2][0] = src.fMap[0][2]; + dst->columns[3][0] = src.fMap[0][3]; + + dst->columns[0][1] = src.fMap[1][0]; + dst->columns[1][1] = src.fMap[1][1]; + dst->columns[2][1] = src.fMap[1][2]; + dst->columns[3][1] = src.fMap[1][3]; + + dst->columns[0][2] = src.fMap[2][0]; + dst->columns[1][2] = src.fMap[2][1]; + dst->columns[2][2] = src.fMap[2][2]; + dst->columns[3][2] = src.fMap[2][3]; + + dst->columns[0][3] = src.fMap[3][0]; + dst->columns[1][3] = src.fMap[3][1]; + dst->columns[2][3] = src.fMap[3][2]; + dst->columns[3][3] = src.fMap[3][3]; + } else { + memcpy(dst, &src.fMap, sizeof(matrix_float4x4)); + } + } + + return dst; +} + + +bool plMetalDevice::InitDevice() +{ + //FIXME: Should Metal adopt InitDevice like OGL? + hsAssert(0, "InitDevice not implemented for Metal rendering"); +} + +void plMetalDevice::Shutdown() +{ + //FIXME: Should Metal adopt Shutdown like OGL? + hsAssert(0, "Shutdown not implemented for Metal rendering"); +} + +void plMetalDevice::SetRenderTarget(plRenderTarget *target) +{ + if(fCurrentRenderTargetCommandEncoder) { + //if we have an existing render target, submit it's commands and release it + //if we need to come back to this render target, we can always create a new render + //pass descriptor and submit more commands + fCurrentRenderTargetCommandEncoder->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; + + fCurrentOffscreenCommandBuffer->enqueue(); + fCurrentOffscreenCommandBuffer->commit(); + fCurrentOffscreenCommandBuffer->release(); + fCurrentOffscreenCommandBuffer = nil; + } + + if(target) { + plMetalRenderTargetRef *deviceTarget= (plMetalRenderTargetRef *)target->GetDeviceRef(); + fCurrentOffscreenCommandBuffer = fCommandQueue->commandBuffer(); + fCurrentOffscreenCommandBuffer->retain(); + fCurrentFragmentOutputTexture = deviceTarget->fTexture; + + MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + renderPassDescriptor->colorAttachments()->object(0)->setTexture(deviceTarget->fTexture); + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); + renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearColor.x, fClearColor.y, fClearColor.z, fClearColor.w)); + + if(deviceTarget->fDepthBuffer) { + renderPassDescriptor->depthAttachment()->setTexture(deviceTarget->fDepthBuffer); + renderPassDescriptor->depthAttachment()->setClearDepth(1.0); + renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); + renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; + } else { + fCurrentDepthFormat = MTL::PixelFormatInvalid; + } + + fCurrentRenderTargetCommandEncoder = fCurrentOffscreenCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); + } else { + if(!fDrawableRenderCommandEncoder) { + MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture()); + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); + renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearColor.x, fClearColor.y, fClearColor.z, fClearColor.w)); + fCurrentFragmentOutputTexture = fCurrentDrawable->texture(); + + renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); + renderPassDescriptor->depthAttachment()->setClearDepth(1.0); + renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); + renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + fDrawableRenderCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); + } + + fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; + } +} + +plMetalDevice::plMetalDevice() +: fErrorMsg(nullptr), + fActiveThread(hsThread::ThisThreadHash()), + fCurrentDrawable(nullptr), + fCommandQueue(nullptr), + fCurrentRenderTargetCommandEncoder(nullptr), + fDrawableRenderCommandEncoder(nullptr), + fCurrentDrawableDepthTexture(nullptr), + fCurrentFragmentOutputTexture(nullptr), + fCurrentCommandBuffer(nullptr) + { + fClearColor = {0.0, 0.0, 0.0, 1.0}; + + fMetalDevice = MTL::CreateSystemDefaultDevice(); + fCommandQueue = fMetalDevice->newCommandQueue(); + + //set up all the depth stencil states + MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); + depthDescriptor->setDepthWriteEnabled(true); + depthDescriptor->setLabel(NS::String::string("No Z Read", NS::UTF8StringEncoding)); + fNoZReadStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionLessEqual); + depthDescriptor->setDepthWriteEnabled(false); + depthDescriptor->setLabel(NS::String::string("No Z Write", NS::UTF8StringEncoding)); + fNoZWriteStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); + depthDescriptor->setDepthWriteEnabled(false); + depthDescriptor->setLabel(NS::String::string("No Z Read or Write", NS::UTF8StringEncoding)); + fNoZReadOrWriteStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionLessEqual); + depthDescriptor->setLabel(NS::String::string("Z Read and Write", NS::UTF8StringEncoding)); + depthDescriptor->setDepthWriteEnabled(true); + fDefaultStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionGreaterEqual); + depthDescriptor->setLabel(NS::String::string("Reverse Z", NS::UTF8StringEncoding)); + depthDescriptor->setDepthWriteEnabled(true); + fReverseZStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); + + depthDescriptor->release(); +} + +void plMetalDevice::SetViewport() { + CurrentRenderCommandEncoder()->setViewport({ (double)fPipeline->GetViewTransform().GetViewPortLeft(), + (double)fPipeline->GetViewTransform().GetViewPortTop(), + (double)fPipeline->GetViewTransform().GetViewPortWidth(), + (double)fPipeline->GetViewTransform().GetViewPortHeight(), + 0.f, 1.f }); +} + +bool plMetalDevice::BeginRender() { + if (fActiveThread == hsThread::ThisThreadHash()) { + return true; + } + + fActiveThread = hsThread::ThisThreadHash(); + + return true; +} + +static uint32_t IGetBufferFormatSize(uint8_t format) +{ + uint32_t size = sizeof( float ) * 6 + sizeof( uint32_t ) * 2; // Position and normal, and two packed colors + + switch (format & plGBufferGroup::kSkinWeightMask) + { + case plGBufferGroup::kSkinNoWeights: + break; + case plGBufferGroup::kSkin1Weight: + size += sizeof(float); + break; + default: + hsAssert( false, "Invalid skin weight value in IGetBufferFormatSize()" ); + } + + size += sizeof( float ) * 3 * plGBufferGroup::CalcNumUVs(format); + + return size; +} + +void plMetalDevice::SetupVertexBufferRef(plGBufferGroup *owner, uint32_t idx, plMetalDevice::VertexBufferRef *vRef) +{ + uint8_t format = owner->GetVertexFormat(); + + if (format & plGBufferGroup::kSkinIndices) { + format &= ~(plGBufferGroup::kSkinWeightMask | plGBufferGroup::kSkinIndices); + format |= plGBufferGroup::kSkinNoWeights; // Should do nothing, but just in case... + vRef->SetSkinned(true); + vRef->SetVolatile(true); + } + + uint32_t vertSize = vertSize = IGetBufferFormatSize(format); // vertex stride + uint32_t numVerts = owner->GetVertBufferCount(idx); + + vRef->fOwner = owner; + vRef->fCount = numVerts; + vRef->fVertexSize = vertSize; + vRef->fFormat = format; + vRef->fRefTime = 0; + + vRef->SetDirty(true); + vRef->SetRebuiltSinceUsed(true); + vRef->fData = nullptr; + + vRef->SetVolatile(vRef->Volatile() || owner->AreVertsVolatile()); + + vRef->fIndex = idx; + + const uint32_t vertStart = owner->GetVertBufferStart(idx) * vertSize; + const uint32_t size = owner->GetVertBufferEnd(idx) * vertSize - vertStart; + + owner->SetVertexBufferRef(idx, vRef); + + hsRefCnt_SafeUnRef(vRef); +} + +void plMetalDevice::CheckStaticVertexBuffer(plMetalDevice::VertexBufferRef *vRef, plGBufferGroup *owner, uint32_t idx) +{ + hsAssert(!vRef->Volatile(), "Creating a managed vertex buffer for a volatile buffer ref"); + + if (!vRef->GetBuffer()) + { + FillVertexBufferRef(vRef, owner, idx); + + // This is currently a no op, but this would let the buffer know it can + // unload the system memory copy, since we have a managed version now. + owner->PurgeVertBuffer(idx); + } +} + +void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx) +{ + const uint32_t vertSize = ref->fVertexSize; + const uint32_t vertStart = group->GetVertBufferStart(idx) * vertSize; + const uint32_t size = group->GetVertBufferEnd(idx) * vertSize - vertStart; + + if(ref->GetBuffer()) { + assert(size <= ref->GetBuffer()->length()); + } + + if (!size) + { + return; + } + + ref->SetBuffer(fMetalDevice->newBuffer(size, MTL::StorageModeManaged)->autorelease()); + uint8_t* buffer = (uint8_t*) ref->GetBuffer()->contents(); + + if (ref->fData) + { + memcpy(buffer, ref->fData + vertStart, size); + } + else + { + hsAssert(0 == vertStart, "Offsets on non-interleaved data not supported"); + hsAssert(group->GetVertBufferCount(idx) * vertSize == size, "Trailing dead space on non-interleaved data not supported"); + + uint8_t* ptr = buffer; + + const uint32_t vertSmallSize = group->GetVertexLiteStride() - sizeof(hsPoint3) * 2; + uint8_t* srcVPtr = group->GetVertBufferData(idx); + plGBufferColor* const srcCPtr = group->GetColorBufferData(idx); + + const int numCells = group->GetNumCells(idx); + int i; + for (i = 0; i < numCells; i++) + { + plGBufferCell* cell = group->GetCell(idx, i); + + if (cell->fColorStart == uint32_t(-1)) + { + /// Interleaved, do straight copy + memcpy(ptr, srcVPtr + cell->fVtxStart, cell->fLength * vertSize); + ptr += cell->fLength * vertSize; + assert(size <= cell->fLength * vertSize); + } + else + { + hsStatusMessage("Non interleaved data"); + + /// Separated, gotta interleave + uint8_t* tempVPtr = srcVPtr + cell->fVtxStart; + plGBufferColor* tempCPtr = srcCPtr + cell->fColorStart; + int j; + for( j = 0; j < cell->fLength; j++ ) + { + memcpy( ptr, tempVPtr, sizeof( hsPoint3 ) * 2 ); + ptr += sizeof( hsPoint3 ) * 2; + tempVPtr += sizeof( hsPoint3 ) * 2; + + memcpy( ptr, &tempCPtr->fDiffuse, sizeof( uint32_t ) ); + ptr += sizeof( uint32_t ); + memcpy( ptr, &tempCPtr->fSpecular, sizeof( uint32_t ) ); + ptr += sizeof( uint32_t ); + + memcpy( ptr, tempVPtr, vertSmallSize ); + ptr += vertSmallSize; + tempVPtr += vertSmallSize; + tempCPtr++; + } + } + } + + hsAssert((ptr - buffer) == size, "Didn't fill the buffer?"); + } + + /// Unlock and clean up + ref->SetRebuiltSinceUsed(true); + ref->SetDirty(false); +} + +void plMetalDevice::FillVolatileVertexBufferRef(plMetalDevice::VertexBufferRef *ref, plGBufferGroup *group, uint32_t idx) +{ + uint8_t* dst = ref->fData; + uint8_t* src = group->GetVertBufferData(idx); + + size_t uvChanSize = plGBufferGroup::CalcNumUVs(group->GetVertexFormat()) * sizeof(float) * 3; + uint8_t numWeights = (group->GetVertexFormat() & plGBufferGroup::kSkinWeightMask) >> 4; + + for (uint32_t i = 0; i < ref->fCount; ++i) { + memcpy(dst, src, sizeof(hsPoint3)); // pre-pos + dst += sizeof(hsPoint3); + src += sizeof(hsPoint3); + + src += numWeights * sizeof(float); // weights + + if (group->GetVertexFormat() & plGBufferGroup::kSkinIndices) + src += sizeof(uint32_t); // indices + + memcpy(dst, src, sizeof(hsVector3)); // pre-normal + dst += sizeof(hsVector3); + src += sizeof(hsVector3); + + memcpy(dst, src, sizeof(uint32_t) * 2); // diffuse & specular + dst += sizeof(uint32_t) * 2; + src += sizeof(uint32_t) * 2; + + // UVWs + memcpy(dst, src, uvChanSize); + src += uvChanSize; + dst += uvChanSize; + } +} + +void plMetalDevice::SetupIndexBufferRef(plGBufferGroup *owner, uint32_t idx, plMetalDevice::IndexBufferRef *iRef) +{ + uint32_t numIndices = owner->GetIndexBufferCount(idx); + iRef->fCount = numIndices; + iRef->fOwner = owner; + iRef->fIndex = idx; + iRef->fRefTime = 0; + + iRef->SetDirty(true); + iRef->SetRebuiltSinceUsed(true); + + owner->SetIndexBufferRef(idx, iRef); + hsRefCnt_SafeUnRef(iRef); + + iRef->SetVolatile(owner->AreIdxVolatile()); +} + +void plMetalDevice::CheckIndexBuffer(plMetalDevice::IndexBufferRef *iRef) +{ + if(!iRef->GetBuffer() && iRef->fCount) { + iRef->SetVolatile(false); + + iRef->SetDirty(true); + iRef->SetRebuiltSinceUsed(true); + } +} + +void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef *iRef, plGBufferGroup *owner, uint32_t idx) +{ + uint32_t startIdx = owner->GetIndexBufferStart(idx); + uint32_t size = (owner->GetIndexBufferEnd(idx) - startIdx) * sizeof(uint16_t); + + if (!size) + { + return; + } + + iRef->PrepareForWrite(); + MTL::Buffer* indexBuffer = iRef->GetBuffer(); + if(!indexBuffer || indexBuffer->length() < size) { + indexBuffer = fMetalDevice->newBuffer(size, MTL::ResourceStorageModeManaged)->autorelease(); + iRef->SetBuffer(indexBuffer); + } + + memcpy(indexBuffer->contents(), owner->GetIndexBufferData(idx), size); + indexBuffer->didModifyRange(NS::Range(0, size)); + + iRef->SetDirty(false); +} + +void plMetalDevice::SetupTextureRef(plLayerInterface *layer, plBitmap *img, plMetalDevice::TextureRef *tRef) +{ + tRef->fOwner = img; + + plBitmap* imageToCheck = img; + + //if it's a cubic texture, check the first face. The root img will give a false format that will cause us to decode wrong. + plCubicEnvironmap* cubicImg = dynamic_cast(img); + if(cubicImg) { + imageToCheck = cubicImg->GetFace(0); + } + + if (imageToCheck->IsCompressed()) { + switch (imageToCheck->fDirectXInfo.fCompressionType) { + case plBitmap::DirectXInfo::kDXT1: + tRef->fFormat = MTL::PixelFormatBC1_RGBA; + break; + case plBitmap::DirectXInfo::kDXT5: + tRef->fFormat = MTL::PixelFormatBC3_RGBA; + break; + } + } else { + switch (imageToCheck->fUncompressedInfo.fType) { + case plBitmap::UncompressedInfo::kRGB8888: + tRef->fFormat = MTL::PixelFormatBGRA8Unorm; + break; + case plBitmap::UncompressedInfo::kRGB4444: + //we'll convert this on load to 8 bits per channel + //Metal doesn't support 4 bits per channel on all hardware + tRef->fFormat = MTL::PixelFormatBGRA8Unorm; + break; + case plBitmap::UncompressedInfo::kRGB1555: + tRef->fFormat = MTL::PixelFormatBGR5A1Unorm; + break; + case plBitmap::UncompressedInfo::kInten8: + tRef->fFormat = MTL::PixelFormatR8Uint; + break; + case plBitmap::UncompressedInfo::kAInten88: + tRef->fFormat = MTL::PixelFormatRG8Uint; + break; + } + } + + tRef->SetDirty(true); + + img->SetDeviceRef(tRef); + hsRefCnt_SafeUnRef(tRef); +} + +void plMetalDevice::CheckTexture(plMetalDevice::TextureRef *tRef) +{ + if (!tRef->fTexture) + { + tRef->SetDirty(true); + } +} + +uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap) +{ + if (mipmap->IsCompressed()) { + mipmap->SetCurrLevel(tRef->fLevels); + while ((mipmap->GetCurrWidth() | mipmap->GetCurrHeight()) & 0x03) { + tRef->fLevels--; + hsAssert(tRef->fLevels >= 0, "How was this ever compressed?" ); + mipmap->SetCurrLevel(tRef->fLevels); + } + } +} + +void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice) +{ + if (img->IsCompressed()) { + + for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { + img->SetCurrLevel(lvl); + + switch (img->fDirectXInfo.fCompressionType) { + case plBitmap::DirectXInfo::kDXT1: + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 2, 0); + break; + case plBitmap::DirectXInfo::kDXT5: + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); + break; + } + } + } else { + for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { + img->SetCurrLevel(lvl); + + if(img->GetCurrLevelPtr()) { + if(img->fUncompressedInfo.fType == plBitmap::UncompressedInfo::kRGB4444) { + + struct RGBA4444Component { + unsigned r:4; + unsigned g:4; + unsigned b:4; + unsigned a:4; + }; + + RGBA4444Component *in = (RGBA4444Component *)img->GetCurrLevelPtr(); + simd_uint4 *out = (simd_uint4 *) malloc(img->GetCurrHeight() * img->GetCurrWidth() * 4); + + for(int i=0; i<(img->GetCurrWidth() * img->GetCurrHeight()); i++) { + out[i].r = in[i].r; + out[i].g = in[i].g; + out[i].b = in[i].b; + out[i].a = in[i].a; + } + + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, out, img->GetCurrWidth() * 4, 0); + + free(out); + } else { + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); + } + } else { + printf("Texture with no image data?\n"); + } + } + } + tRef->fTexture->setLabel(NS::String::string(img->GetKeyName().c_str(), NS::UTF8StringEncoding)); + tRef->SetDirty(false); +} + +void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef *tRef, plLayerInterface *layer, plMipmap *img) +{ + if (!img->GetImage()) { + return; + } + + tRef->fLevels = img->GetNumLevels() - 1; + if(!tRef->fTexture) { + ConfigureAllowedLevels(tRef, img); + //texture doesn't exist yet, create it + bool supportsMipMap = tRef->fLevels; + MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(tRef->fFormat, img->GetWidth(), img->GetHeight(), supportsMipMap); + descriptor->setUsage(MTL::TextureUsageShaderRead); + //if device has unified memory, set storage mode to shared + if(fMetalDevice->hasUnifiedMemory()) { + descriptor->setStorageMode(MTL::StorageModeShared); + } + descriptor->setUsage(MTL::TextureUsageShaderRead); + //Metal gets mad if we set this with 0, only set it if we know there are mipmaps + if(supportsMipMap) { + descriptor->setMipmapLevelCount(tRef->fLevels + 1); + } + tRef->fTexture = fMetalDevice->newTexture(descriptor); + } + PopulateTexture( tRef, img, 0); +} + +void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plLayerInterface *layer, plCubicEnvironmap *img) +{ + MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::textureCubeDescriptor(tRef->fFormat, img->GetFace(0)->GetWidth(), tRef->fLevels != 0); + + if (tRef->fLevels != 0) { + descriptor->setMipmapLevelCount(tRef->fLevels + 1); + } + descriptor->setUsage(MTL::TextureUsageShaderRead); + //if device has unified memory, set storage mode to shared + if(fMetalDevice->hasUnifiedMemory()) { + descriptor->setStorageMode(MTL::StorageModeShared); + } + + tRef->fTexture = fMetalDevice->newTexture(descriptor); + + static const uint kFaceMapping[] = { + 1, // kLeftFace + 0, // kRightFace + 4, // kFrontFace + 5, // kBackFace + 2, // kTopFace + 3 // kBottomFace + }; + for (size_t i = 0; i < 6; i++) { + PopulateTexture( tRef, img->GetFace(i), kFaceMapping[i]); + } +} + +void plMetalDevice::SetProjectionMatrix(const hsMatrix44& src) +{ + hsMatrix2SIMD(src, &fMatrixProj); +} + +void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src) +{ + hsMatrix44 inv; + src.GetInverse(&inv); + + hsMatrix2SIMD(src, &fMatrixW2C); + hsMatrix2SIMD(inv, &fMatrixC2W); +} + +void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder) +{ + hsMatrix44 inv; + src.GetInverse(&inv); + + hsMatrix2SIMD(src, &fMatrixL2W, swapOrder); + hsMatrix2SIMD(inv, &fMatrixW2L, swapOrder); +} + +plMetalDevice::plPipelineStateAtrributes::plPipelineStateAtrributes(const plMetalVertexBufferRef * vRef, const uint32_t blendFlags, const MTL::PixelFormat outputPixelFormat, const MTL::PixelFormat outputDepthFormat, const plShaderID::ID vertexShaderID, const plShaderID::ID fragmentShaderID, const int forShadows, const uint numLayers) +{ + numUVs = plGBufferGroup::CalcNumUVs(vRef->fFormat); + numWeights = (vRef->fFormat & plGBufferGroup::kSkinWeightMask) >> 4; + hasSkinIndices = (vRef->fFormat & plGBufferGroup::kSkinIndices); + outputFormat = outputPixelFormat; + this->depthFormat = outputDepthFormat; + this->blendFlags = blendFlags; + this->vertexShaderID = vertexShaderID; + this->fragmentShaderID = fragmentShaderID; + this->forShadows = forShadows; + this->numLayers = numLayers; +} + + +std::condition_variable * plMetalDevice::prewarmPipelineStateFor(plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, bool forShadows) +{ + plPipelineStateAtrributes attributes = plPipelineStateAtrributes(vRef, blendFlags, fCurrentFragmentOutputTexture->pixelFormat(), fCurrentDepthFormat, vertexShaderID, fragmentShaderID, forShadows, numLayers); + //only render thread is allowed to prewarm, no race conditions around + //fConditionMap creation + if(!fPipelineStateMap[attributes] && fConditionMap[attributes]) { + std::condition_variable *condOut; + StartRenderPipelineBuild(attributes, &condOut); + return condOut; + } + return nullptr; +} + +void plMetalDevice::StartRenderPipelineBuild(plPipelineStateAtrributes &attributes, std::condition_variable **condOut) +{ + /* + Shader building requires both knowledge of the vertex buffer layout and the fragment shader details. For now it lives here. The caching and threading mechanism should be factored out so that OpenGL can share them. Vector buffer dependencies should be factored out so we only need material details. That also means we can use the threading to create these earlier in a render pass. + */ + int vertOffset = 0; + int skinWeightOffset = vertOffset + (sizeof(float) * 3); + if(attributes.hasSkinIndices) { + skinWeightOffset += sizeof(uint32_t); + } + int normOffset = skinWeightOffset + (sizeof(float) * attributes.numWeights); + int colorOffset = normOffset + (sizeof(float) * 3); + int baseUvOffset = colorOffset + (sizeof(uint32_t) * 2); + int stride = baseUvOffset + (sizeof(float) * 3 * attributes.numUVs); + + MTL::Library *library = fMetalDevice->newDefaultLibrary(); + + MTL::FunctionConstantValues *functionContents = MTL::FunctionConstantValues::alloc()->init(); + functionContents->setConstantValue(&attributes.numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); + functionContents->setConstantValue(&attributes.numLayers, MTL::DataTypeUShort, FunctionConstantNumLayers); + MTL::Function *fragFunction; + MTL::Function *vertFunction; + + if(!attributes.vertexShaderID && !attributes.fragmentShaderID) { + if(attributes.forShadows == 1) { + fragFunction = library->newFunction( + NS::String::string("shadowFragmentShader", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + } else if(attributes.forShadows == 2) { + fragFunction = library->newFunction( + NS::String::string("shadowCastFragmentShader", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + } else { + fragFunction = library->newFunction( + NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + } + vertFunction = library->newFunction( + NS::String::string("pipelineVertexShader", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + } else if(attributes.vertexShaderID && attributes.fragmentShaderID) { + switch(attributes.vertexShaderID) { + case plShaderID::vs_WaveFixedFin7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveFixedFin7", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::vs_CompCosines: + vertFunction = library->newFunction( + NS::String::string("vs_CompCosines", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::vs_BiasNormals: + vertFunction = library->newFunction( + NS::String::string("vs_BiasNormals", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::vs_GrassShader: + vertFunction = library->newFunction( + NS::String::string("vs_GrassShader", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::vs_WaveDecEnv_7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveDecEnv_7", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + default: + hsAssert(0, "unknown shader requested"); + } + + switch(attributes.fragmentShaderID) { + case plShaderID::ps_WaveFixed: + fragFunction = library->newFunction( + NS::String::string("ps_WaveFixed", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::ps_MoreCosines: + fragFunction = library->newFunction( + NS::String::string("ps_CompCosines", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::ps_BiasNormals: + fragFunction = library->newFunction( + NS::String::string("ps_BiasNormals", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::ps_GrassShader: + fragFunction = library->newFunction( + NS::String::string("ps_GrassShader", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + case plShaderID::ps_WaveDecEnv: + fragFunction = library->newFunction( + NS::String::string("ps_WaveDecEnv", NS::ASCIIStringEncoding), + functionContents, + (NS::Error **)NULL + ); + break; + default: + hsAssert(0, "unknown shader requested"); + } + } else { + hsAssert(0, "Pipeline only supports both fragment and vertex shaders together"); + } + + MTL::VertexDescriptor *vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); + + vertexDescriptor->attributes()->object(VertexAttributePosition)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributePosition)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributePosition)->setOffset(vertOffset); + + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setOffset(normOffset); + + for(int i=0; iattributes()->object(VertexAttributeTexcoord+i)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setOffset(baseUvOffset + (i * sizeof(float) * 3)); + } + + vertexDescriptor->attributes()->object(VertexAttributeColor)->setFormat(MTL::VertexFormatUChar4); + vertexDescriptor->attributes()->object(VertexAttributeColor)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeColor)->setOffset(colorOffset); + + vertexDescriptor->layouts()->object(VertexAttributePosition)->setStride(stride); + + MTL::RenderPipelineDescriptor *descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + + descriptor->setDepthAttachmentPixelFormat(attributes.depthFormat); + + + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); + // No color, just writing out Z values. + if(attributes.forShadows == 1) { + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); + } else if(attributes.forShadows == 2) { + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); + } else if (attributes.blendFlags & hsGMatState::kBlendNoColor) { + //printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + } else { + switch (attributes.blendFlags & hsGMatState::kBlendMask) + { + // Detail is just a special case of alpha, handled in construction of the texture + // mip chain by making higher levels of the chain more transparent. + case hsGMatState::kBlendDetail: + case hsGMatState::kBlendAlpha: + if (attributes.blendFlags & hsGMatState::kBlendInvertFinalAlpha) { + if (attributes.blendFlags & hsGMatState::kBlendAlphaPremultiplied) { + //printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + } else { + //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + } + } else { + if (attributes.blendFlags & hsGMatState::kBlendAlphaPremultiplied) { + //printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); + } else { + //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + } + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + } + break; + + // Multiply the final color onto the frame buffer. + case hsGMatState::kBlendMult: + if (attributes.blendFlags & hsGMatState::kBlendInvertFinalColor) { + //printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceColor); + } else { + //printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorSourceColor); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceColor); + } + break; + + // Add final color to FB. + case hsGMatState::kBlendAdd: + //printf("glBlendFunc(GL_ONE, GL_ONE);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + break; + + // Multiply final color by FB color and add it into the FB. + case hsGMatState::kBlendMADD: + //printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorDestinationColor); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorDestinationColor); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + break; + + // Final color times final alpha, added into the FB. + case hsGMatState::kBlendAddColorTimesAlpha: + if (attributes.blendFlags & hsGMatState::kBlendInvertFinalAlpha) { + //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + } else { + //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + } + break; + + // Overwrite final color onto FB + case 0: + //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + descriptor->colorAttachments()->object(0)->setRgbBlendOperation(MTL::BlendOperationAdd); + //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); + + /*descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero);*/ + break; + + default: + { + /*hsAssert(false, "Too many blend modes specified in material"); + plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); + if( lay ) + { + if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha ) + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); + } + else + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); + } + }*/ + } + break; + } + } + + descriptor->colorAttachments()->object(0)->setPixelFormat(attributes.outputFormat); + + descriptor->setFragmentFunction(fragFunction); + descriptor->setVertexFunction(vertFunction); + descriptor->setVertexDescriptor(vertexDescriptor); + std::string label = "Render Pipeline: " + std::to_string(attributes.numUVs) + "UVs, " + std::to_string(attributes.numWeights) + " skin weight"; + descriptor->setLabel(NS::String::string(label.c_str(), NS::UTF8StringEncoding)); + + functionContents->release(); + + __block std::condition_variable *newCondition = new std::condition_variable(); + fConditionMap[attributes] = newCondition; + if(condOut) { + *condOut = newCondition; + } + + fMetalDevice->newRenderPipelineState(descriptor, ^(MTL::RenderPipelineState *pipelineState, NS::Error *error){ + if(error) { + hsAssert(0, error->localizedDescription()->cString(NS::UTF8StringEncoding)); + //leave the condition in place for now, we don't want to + //retry if the shader is defective. the condition will + //prevent retries + } else { + //update the pipeline state, if it's null just set null + pipelineState->retain(); + + plMetalLinkedPipeline *linkedPipeline = new plMetalLinkedPipeline(); + linkedPipeline->pipelineState = pipelineState; + linkedPipeline->fragFunction = fragFunction; + linkedPipeline->vertexFunction = vertFunction; + + fPipelineStateMap[attributes] = linkedPipeline; + } + //signal that we're done + newCondition->notify_all(); + }); + descriptor->release(); + library->release(); +} + +plMetalDevice::plMetalLinkedPipeline* plMetalDevice::pipelineStateFor(const plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, int forShadows) +{ + plPipelineStateAtrributes attributes = plPipelineStateAtrributes(vRef, blendFlags, fCurrentFragmentOutputTexture->pixelFormat(), fCurrentDepthFormat, vertexShaderID, fragmentShaderID, forShadows, numLayers); + plMetalLinkedPipeline* renderState = fPipelineStateMap[attributes]; + + //if it exists, return it, we're done + if(renderState) { + return renderState; + } + + //check and see if we're already building it. If so, wait. + //Note: even if it already exists, this lock will be kept, and it will + //let us through. This is to prevent race conditions where the render state + //was null, but maybe in the time it took us to get here the state compiled. + std::condition_variable *alreadyBuildingCondition = fConditionMap[attributes]; + if(alreadyBuildingCondition) { + std::unique_lock lock(fPipelineCreationMtx); + alreadyBuildingCondition->wait(lock); + + //should be returning the render state here, if not it failed to build + //we'll allow the null return + return fPipelineStateMap[attributes]; + } + + //it doesn't exist, start a build and wait + //only render thread is allowed to start builds, + //shouldn't be race conditions here + StartRenderPipelineBuild(attributes, &alreadyBuildingCondition); + std::unique_lock lock(fPipelineCreationMtx); + alreadyBuildingCondition->wait(lock); + + //should be returning the render state here, if not it failed to build + //we'll allow the null return + return fPipelineStateMap[attributes]; +} + +void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) +{ + fCurrentCommandBuffer = fCommandQueue->commandBuffer(); + fCurrentCommandBuffer->retain(); + + //cache the depth buffer, we'll just clear it every time. + if(fCurrentDrawableDepthTexture == nullptr || + drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || + drawable->texture()->height() != fCurrentDrawableDepthTexture->height() + ) { + if(fCurrentDrawableDepthTexture) { + fCurrentDrawableDepthTexture->release(); + } + + MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + drawable->texture()->width(), + drawable->texture()->height(), + false); + if(fMetalDevice->hasUnifiedMemory()) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + + fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); + } + + fCurrentDrawable = drawable->retain(); +} + +MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() +{ + return fCurrentCommandBuffer; +} + +void plMetalDevice::SubmitCommandBuffer() +{ + fDrawableRenderCommandEncoder->endEncoding(); + fDrawableRenderCommandEncoder->release(); + fDrawableRenderCommandEncoder = nil; + + fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); + fCurrentCommandBuffer->enqueue(); + fCurrentCommandBuffer->commit(); + //as we more tightly manage resource sync we may be able to avoid waiting for the frame to complete + //fCurrentCommandBuffer->waitUntilCompleted(); + fCurrentCommandBuffer->release(); + fCurrentCommandBuffer = nil; + + fCurrentDrawable->release(); + fCurrentDrawable = nil; +} + +MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() +{ + //return the current render command encoder + //if a framebuffer wasn't set, assume screen, emulating GL + if(fCurrentRenderTargetCommandEncoder) { + return fCurrentRenderTargetCommandEncoder; + } + + //lazilly create the screen render encoder if it does not yet exist + if(!fDrawableRenderCommandEncoder) { + SetRenderTarget(NULL); + } + return fDrawableRenderCommandEncoder; +} + +CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() +{ + return fCurrentDrawable; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h new file mode 100644 index 0000000000..ccf30eaa22 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -0,0 +1,246 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalDevice_h_ +#define _plMetalDevice_h_ + +#include "HeadSpin.h" + +#include "plMetalDeviceRef.h" +#include "hsMatrix44.h" + +#include +#include +#include + +#include +#include + +#include "plSurface/plShader.h" +#include "plSurface/plShaderTable.h" + +class plMetalPipeline; +class plRenderTarget; +class plBitmap; +class plMipmap; +class plCubicEnvironmap; +class plLayerInterface; + +matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder = true); + +class plMetalDevice +{ + + friend plMetalPipeline; + friend class plMetalMaterialShaderRef; + friend class plMetalPlateManager; + +public: + typedef plMetalVertexBufferRef VertexBufferRef; + typedef plMetalIndexBufferRef IndexBufferRef; + typedef plMetalTextureRef TextureRef; + +public: + plMetalPipeline* fPipeline; + + hsWindowHndl fDevice; + hsWindowHndl fWindow; + + const char* fErrorMsg; + + MTL::RenderCommandEncoder* CurrentRenderCommandEncoder(); + MTL::Device* fMetalDevice; + MTL::CommandQueue* fCommandQueue; + MTL::Buffer* fCurrentIndexBuffer; + + size_t fActiveThread; + matrix_float4x4 fMatrixProj; + matrix_float4x4 fMatrixL2W; + matrix_float4x4 fMatrixW2L; + matrix_float4x4 fMatrixW2C; + matrix_float4x4 fMatrixC2W; + +public: + + struct plMetalLinkedPipeline { + MTL::RenderPipelineState *pipelineState; + MTL::Function *fragFunction; + MTL::Function *vertexFunction; + }; + + plMetalDevice(); + + bool InitDevice(); + + void Shutdown(); + + /** + * Set rendering to the specified render target. + * + * Null rendertarget is the primary. Invalidates the state as required by + * experience, not documentation. + */ + void SetRenderTarget(plRenderTarget* target); + + /** Translate our viewport into a GL viewport. */ + void SetViewport(); + + + bool BeginRender(); + + /* Device Ref Functions **************************************************/ + void SetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, VertexBufferRef* vRef); + void CheckStaticVertexBuffer(VertexBufferRef* vRef, plGBufferGroup* owner, uint32_t idx); + void FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx); + void FillVolatileVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx); + void SetupIndexBufferRef(plGBufferGroup* owner, uint32_t idx, IndexBufferRef* iRef); + void CheckIndexBuffer(IndexBufferRef* iRef); + void FillIndexBufferRef(IndexBufferRef* iRef, plGBufferGroup* owner, uint32_t idx); + + void SetupTextureRef(plLayerInterface* layer, plBitmap* img, TextureRef* tRef); + void CheckTexture(TextureRef* tRef); + void MakeTextureRef(TextureRef* tRef, plLayerInterface* layer, plMipmap* img); + void MakeCubicTextureRef(TextureRef* tRef, plLayerInterface* layer, plCubicEnvironmap* img); + + + const char* GetErrorString() const { return fErrorMsg; } + + void SetProjectionMatrix(const hsMatrix44& src); + void SetWorldToCameraMatrix(const hsMatrix44& src); + void SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder = true); + + void SetClearColor(simd_float4 clearColor) { fClearColor = clearColor; }; + + void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice); + uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap); + std::condition_variable * prewarmPipelineStateFor(plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, bool forShadows = false); + ///Returns the proper pipeline state for the given vertex and fragment buffers, and the current drawable. These states should not be reused between drawables. + plMetalLinkedPipeline* pipelineStateFor(const plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, int forShadows = 0); + + //stencil states are expensive to make, they should be cached + //FIXME: There should be a function to pair these with hsGMatState + MTL::DepthStencilState *fNoZReadStencilState; + MTL::DepthStencilState *fNoZWriteStencilState; + MTL::DepthStencilState *fNoZReadOrWriteStencilState; + MTL::DepthStencilState *fReverseZStencilState; + MTL::DepthStencilState *fDefaultStencilState; + + ///Create a new command buffer to encode all the operations needed to draw a frame + //Currently requires a CA drawable and not a Metal drawable. In since CA drawable is only abstract implementation I know about, not sure where we would find others? + void CreateNewCommandBuffer(CA::MetalDrawable* drawable); + MTL::CommandBuffer* GetCurrentCommandBuffer(); + CA::MetalDrawable* GetCurrentDrawable(); + ///Submit the command buffer to the GPU and draws all the render passes. Clears the current command buffer. + void SubmitCommandBuffer(); + ///Render encoder to submit draw commands to. This state will automatically reflect the displayable or the current render target, depending on which target has been assigned by Plasma. Will be null if there is no current command buffer. + /// +private: + + //internal struct for tracking which Metal state goes with which set of + //fragment/vertex pass attributes. This allows for shader program reuse. + //Hashable so we can use a std::unordered_map for storage + struct plPipelineStateAtrributes { + uint numUVs; + uint numLayers; + uint numWeights; + bool hasSkinIndices; + plShaderID::ID vertexShaderID; + plShaderID::ID fragmentShaderID; + //the specific blend mode flag, not the entire set of flags from a material + //these are defined as mutually exclusive anyway + //0 implies no blend flag set + uint32_t blendFlags; + MTL::PixelFormat outputFormat; + MTL::PixelFormat depthFormat; + int forShadows; + + bool operator==(const plPipelineStateAtrributes &p) const { + return numUVs == p.numUVs && numWeights == p.numWeights && blendFlags == p.blendFlags && hasSkinIndices == p.hasSkinIndices && outputFormat == p.outputFormat && vertexShaderID == p.vertexShaderID && fragmentShaderID == p.fragmentShaderID && depthFormat == p.depthFormat && forShadows == p.forShadows && numUVs == p.numUVs && numLayers == p.numLayers; + } + + plPipelineStateAtrributes(const plPipelineStateAtrributes &attributes) { + memcpy(this, &attributes, sizeof(plPipelineStateAtrributes)); + } + + plPipelineStateAtrributes(const plMetalVertexBufferRef * vRef, const uint32_t blendFlags, const MTL::PixelFormat outputPixelFormat, const MTL::PixelFormat outputDepthFormat, const plShaderID::ID vertexShaderID, const plShaderID::ID fragmentShaderID, const int forShadows, const uint numLayers); + }; + + struct plPipelineStateAtrributesHashFunction + { + std::size_t operator() (plPipelineStateAtrributes const & key) const + { + std::size_t h1 = std::hash()(key.numUVs); + std::size_t h2 = std::hash()(key.numWeights); + std::size_t h3 = std::hash()(key.blendFlags); + std::size_t h4 = std::hash()(key.hasSkinIndices); + std::size_t h5 = std::hash()(key.outputFormat); + std::size_t h6 = std::hash()(key.vertexShaderID); + std::size_t h7 = std::hash()(key.fragmentShaderID); + std::size_t h8 = std::hash()(key.depthFormat); + std::size_t h9 = std::hash()(key.forShadows); + std::size_t h10 = std::hash()(key.numLayers); + + return h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ h6 ^ h7 ^ h8 ^ h9 ^ h10; + } + }; + + std::unordered_map fPipelineStateMap; + //the condition map allows consumers of pipeline states to wait until the pipeline state is ready + std::unordered_map fConditionMap; + void StartRenderPipelineBuild(plPipelineStateAtrributes &attributes, std::condition_variable **condOut); + std::mutex fPipelineCreationMtx; + +private: + //these are internal bits for backing the current render pass + //private because the functions should be used to keep a consistant + //render pass state + MTL::CommandBuffer* fCurrentCommandBuffer; + MTL::CommandBuffer* fCurrentOffscreenCommandBuffer; + MTL::RenderCommandEncoder* fCurrentRenderTargetCommandEncoder; + MTL::RenderCommandEncoder* fDrawableRenderCommandEncoder; + MTL::Texture* fCurrentDrawableDepthTexture; + MTL::Texture* fCurrentFragmentOutputTexture; + CA::MetalDrawable* fCurrentDrawable; + MTL::PixelFormat fCurrentDepthFormat; + simd_float4 fClearColor; +}; + +#endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp new file mode 100644 index 0000000000..772b965ee3 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp @@ -0,0 +1,83 @@ +// +// plMetalDeviceRef.cpp +// CoreLib +// +// Created by Colin Cornaby on 11/10/21. +// + +#include "plMetalDeviceRef.h" + + + +void plMetalDeviceRef::Unlink() { + hsAssert(fBack, "plGLDeviceRef not in list"); + + if (fNext) + fNext->fBack = fBack; + *fBack = fNext; + + fBack = nullptr; + fNext = nullptr; + +} + +void plMetalDeviceRef::Link(plMetalDeviceRef **back) { + hsAssert(fNext == nullptr && fBack == nullptr, "Trying to link a plMetalDeviceRef that's already linked"); + + fNext = *back; + if (*back) + (*back)->fBack = &fNext; + fBack = back; + *back = this; +} + +plMetalDeviceRef::~plMetalDeviceRef() +{ + if (fNext != nullptr || fBack != nullptr) + Unlink(); +} + +plMetalVertexBufferRef::~plMetalVertexBufferRef() +{ + Release(); +} + + +void plMetalVertexBufferRef::Release() +{ + SetDirty(true); +} + +plMetalTextureRef::~plMetalTextureRef() +{ + //fTexture->release(); + Release(); +} + + +void plMetalTextureRef::Release() +{ + SetDirty(true); +} + + +plMetalIndexBufferRef::~plMetalIndexBufferRef() +{ + Release(); +} + + +void plMetalIndexBufferRef::Release() +{ + SetDirty(true); +} + + +plMetalRenderTargetRef::~plMetalRenderTargetRef() { + Release(); +} + +void plMetalRenderTargetRef::Release() +{ + SetDirty(true); +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h new file mode 100644 index 0000000000..78825c3d63 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -0,0 +1,293 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalDeviceRef_inc_ +#define _plMetalDeviceRef_inc_ + +#include "HeadSpin.h" +#include "hsGDeviceRef.h" +#include +#include + +class plGBufferGroup; +class plBitmap; +class plRenderTarget; + + +class plMetalDeviceRef : public hsGDeviceRef +{ +protected: + plMetalDeviceRef* fNext; + plMetalDeviceRef** fBack; + +public: + void Unlink(); + void Link(plMetalDeviceRef **back); + plMetalDeviceRef* GetNext() { return fNext; } + bool IsLinked() { return fBack != nullptr; } + + bool HasFlag(uint32_t f) const { return 0 != (fFlags & f); } + void SetFlag(uint32_t f, bool on) { if(on) fFlags |= f; else fFlags &= ~f; } + + virtual void Release() = 0; + + plMetalDeviceRef(); + virtual ~plMetalDeviceRef(); +}; + +/* + The buffer pool stores and recycles buffers so that Plasma can encode GPU commands and render in parallel. That means we can't touch buffers the GPU is using, and if a pass or frame rewrites a buffer we have to make sure it's not stomping on something that is already attached to a frame. Because Metal can triple buffer, the first dimension of caching is hard coded to 3. Some ages will also rewrite buffers an unspecified number of times between render passes. For example: A reflection render and a main render might have different index buffers. So the second dimension of caching uses an unbounded vector that will hold enough buffers to render in any one age. + + Buffer pools do not allocate buffers, they only store them. The outside caller is responsible for allocating a buffer and then setting it. The buffer pool will retain any buffers within the pool, and automatically release them when they are overwritten or the pool is deallocated. + + Because buffers are only stored on write, and no allocations happen within the pool, overhead is kept low for static buffers. Completely static buffers will never expand the pool if they only write once. + */ +class plMetalBufferPoolRef : public plMetalDeviceRef { +public: + uint32_t fCurrentFrame; + uint32_t fCurrentPass; + uint32_t fLastWriteFrameTime; + + plMetalBufferPoolRef() : + plMetalDeviceRef(), + fLastWriteFrameTime(0), + fCurrentPass(0), + fCurrentFrame(0), + fBuffer(nullptr) + { + } + + //Prepare for write must be called anytime a new pass is going to write a buffer. It moves internal record keeping to reflect that either a new frame or new pass is about to write to the pool. + void PrepareForWrite() { + //if we've moved frames since the last time a write happened, reset our current pass index to 0, otherwise increment the current pass + if(fLastWriteFrameTime != fFrameTime) { + fCurrentPass = 0; + fLastWriteFrameTime = fFrameTime; + fCurrentFrame = (++fCurrentFrame % 3); + } else { + fCurrentPass++; + } + + //update the current buffer focused, if the is no buffer to focus set it to null + uint currentSize = fBuffers[fCurrentFrame].size(); + if(fCurrentPass < currentSize) { + fBuffer = fBuffers[fCurrentFrame][fCurrentPass]; + } else { + fBuffer = nullptr; + } + } + + static void SetFrameTime(uint32_t frameTime) { fFrameTime = frameTime; }; + + MTL::Buffer* GetBuffer() { return fBuffer; }; + + void SetBuffer(MTL::Buffer* buffer) { + fBuffer = buffer->retain(); + uint currentSize = fBuffers[fCurrentFrame].size(); + //if the current vector doesn't have enough room for the entry, resize it + if(fCurrentPass >= currentSize) { + fBuffers[fCurrentFrame].resize(++currentSize); + } else if(fBuffers[fCurrentFrame][fCurrentPass]) { + //if we're replacing an existing entry, release the old one + fBuffers[fCurrentFrame][fCurrentPass]->release(); + } + fBuffers[fCurrentFrame][fCurrentPass] = fBuffer; + } + + void Release() { + for(int i=0; i<3; i++) { + for (auto buffer : fBuffers[i]) { + buffer->release(); + } + } + fBuffer = nullptr; + } + +private: + static uint32_t fFrameTime; + MTL::Buffer* fBuffer; + std::vector fBuffers[3]; +}; + + +class plMetalVertexBufferRef : public plMetalBufferPoolRef +{ +public: + plGBufferGroup* fOwner; + uint32_t fCount; + uint32_t fIndex; + uint32_t fVertexSize; + int32_t fOffset; + uint8_t fFormat; + uint8_t* fData; + + uint32_t fRefTime; + + enum { + kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef + kVolatile = 0x20, + kSkinned = 0x40 + }; + + bool RebuiltSinceUsed() const { return HasFlag(kRebuiltSinceUsed); } + void SetRebuiltSinceUsed(bool b) { SetFlag(kRebuiltSinceUsed, b); } + + bool Volatile() const { return HasFlag(kVolatile); } + void SetVolatile(bool b) { SetFlag(kVolatile, b); } + + bool Skinned() const { return HasFlag(kSkinned); } + void SetSkinned(bool b) { SetFlag(kSkinned, b); } + + bool Expired(uint32_t t) const { return Volatile() && (IsDirty() || (fRefTime != t)); } + void SetRefTime(uint32_t t) { fRefTime = t; } + + plMetalVertexBufferRef() : + plMetalBufferPoolRef(), + fCount(0), + fIndex(0), + fVertexSize(0), + fOffset(0), + fOwner(nullptr), + fData(nullptr), + fFormat(0), + fRefTime(0) + { + } + + virtual ~plMetalVertexBufferRef(); + + + void Link(plMetalVertexBufferRef** back ) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalVertexBufferRef* GetNext() { return (plMetalVertexBufferRef*)fNext; } + + void Release(); +}; + + +class plMetalIndexBufferRef : public plMetalBufferPoolRef +{ +public: + uint32_t fCount; + uint32_t fIndex; + plGBufferGroup* fOwner; + uint32_t fRefTime; + uint32_t fLastWriteFrameTime; + + enum { + kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef + kVolatile = 0x20 + }; + + bool RebuiltSinceUsed() const { return HasFlag(kRebuiltSinceUsed); } + void SetRebuiltSinceUsed(bool b) { SetFlag(kRebuiltSinceUsed, b); } + + bool Volatile() const { return HasFlag(kVolatile); } + void SetVolatile(bool b) { SetFlag(kVolatile, b); } + + bool Expired(uint32_t t) const { return Volatile() && (IsDirty() || (fRefTime != t)); } + void SetRefTime(uint32_t t) { fRefTime = t; } + + void Release(); + + void Link(plMetalIndexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalIndexBufferRef* GetNext() { return (plMetalIndexBufferRef*)fNext; } + virtual ~plMetalIndexBufferRef(); + + plMetalIndexBufferRef(): + plMetalBufferPoolRef(), + fCount(0), + fIndex(0), + fRefTime(0), + fLastWriteFrameTime(0), + fOwner(nullptr) { + } +}; + + +class plMetalTextureRef : public plMetalDeviceRef +{ +public: + plBitmap* fOwner; + + uint32_t fLevels; + MTL::Texture* fTexture; + MTL::PixelFormat fFormat; + + void Link(plMetalTextureRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalTextureRef* GetNext() { return (plMetalTextureRef*)fNext; } + + plMetalTextureRef() : + plMetalDeviceRef(), + fOwner(nullptr), + fTexture(nullptr), + fLevels(1) + { + } + + virtual ~plMetalTextureRef(); + + void Release(); +}; + + + +class plMetalRenderTargetRef: public plMetalTextureRef +{ +public: + MTL::Texture* fDepthBuffer; + + void Link(plMetalRenderTargetRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalRenderTargetRef* GetNext() { return (plMetalRenderTargetRef*)fNext; } + + plMetalRenderTargetRef() : fDepthBuffer(nullptr) + { + } + + virtual ~plMetalRenderTargetRef(); + + void Release(); + + virtual void SetOwner(plRenderTarget* targ) { fOwner = (plBitmap*)targ; } +}; + + +#endif // _plGLDeviceRef_inc_ + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp new file mode 100644 index 0000000000..5c94dc2c0a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp @@ -0,0 +1,164 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "plPipeline/hsWinRef.h" + +#include "plMetalPipeline.h" +#include "plMetalDeviceRef.h" + +#include "plProfile.h" +#include "plStatusLog/plStatusLog.h" + +plProfile_CreateMemCounter("Vertices", "Memory", MemVertex); +plProfile_CreateMemCounter("Indices", "Memory", MemIndex); +plProfile_CreateMemCounter("Textures", "Memory", MemTexture); + + +/***************************************************************************** + ** Generic plGLDeviceRef Functions ** + *****************************************************************************/ +plMetalDeviceRef::plMetalDeviceRef() +: fNext(nullptr), + fBack(nullptr) +{ +} + +plMetalDeviceRef::~plMetalDeviceRef() +{ + if (fNext != nullptr || fBack != nullptr) + Unlink(); +} + +void plMetalDeviceRef::Unlink() { + hsAssert(fBack, "plGLDeviceRef not in list"); + + if (fNext) + fNext->fBack = fBack; + *fBack = fNext; + + fBack = nullptr; + fNext = nullptr; + +} + +uint32_t plMetalBufferPoolRef::fFrameTime(0); + +void plMetalDeviceRef::Link(plMetalDeviceRef **back) { + hsAssert(fNext == nullptr && fBack == nullptr, "Trying to link a plMetalDeviceRef that's already linked"); + + fNext = *back; + if (*back) + (*back)->fBack = &fNext; + fBack = back; + *back = this; +} + + +/***************************************************************************** + ** Vertex buffer cleanup Functions ** + *****************************************************************************/ + +plMetalVertexBufferRef::~plMetalVertexBufferRef() +{ + Release(); +} + + +void plMetalVertexBufferRef::Release() +{ + SetDirty(true); +} + + +/***************************************************************************** + ** Index buffer cleanup Functions ** + *****************************************************************************/ + +plMetalIndexBufferRef::~plMetalIndexBufferRef() +{ + Release(); +} + +void plMetalIndexBufferRef::Release() +{ + SetDirty(true); +} + + +/***************************************************************************** + ** Texture cleanup Functions ** + *****************************************************************************/ + +void plMetalTextureRef::Release() +{ + if (fTexture) { + fTexture->release(); + fTexture = nullptr; + } + SetDirty(true); +} + +plMetalTextureRef::~plMetalTextureRef() +{ + Release(); + + if (fNext != nullptr || fBack != nullptr) + Unlink(); +} + + +/***************************************************************************** + ** FrameBuffer cleanup Functions ** + *****************************************************************************/ + +plMetalRenderTargetRef::~plMetalRenderTargetRef() { + Release(); +} + +void plMetalRenderTargetRef::Release() +{ + if(fDepthBuffer) { + fDepthBuffer->release(); + fDepthBuffer = nullptr; + } + plMetalTextureRef::Release(); + SetDirty(true); +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp new file mode 100644 index 0000000000..e411fb6831 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp @@ -0,0 +1,81 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "HeadSpin.h" +#include "hsWindows.h" + +#include + +#include "plMetalFragmentShader.h" + +#include "plSurface/plShader.h" + +#include "plDrawable/plGBufferGroup.h" +#include "plMetalPipeline.h" + +plMetalFragmentShader::plMetalFragmentShader(plShader* owner) +: plMetalShader(owner) +{ +} + +plMetalFragmentShader::~plMetalFragmentShader() +{ + Release(); +} + +void plMetalFragmentShader::Release() +{ + fPipe = nil; + + //ISetError(nil); +} + +bool plMetalFragmentShader::ISetConstants(plMetalPipeline* pipe) +{ + if( fOwner->GetNumConsts() ) + { + float *ptr = (float *)fOwner->GetConstBasePtr(); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, BufferIndexUniforms); + } + + return true; +} + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h new file mode 100644 index 0000000000..e9f3fd3f97 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h @@ -0,0 +1,66 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plMetalFragmentShader_inc +#define plMetalFragmentShader_inc + +#include "plMetalShader.h" + +class plShader; +class plMetalPipeline; + +class plMetalFragmentShader : public plMetalShader +{ +protected: + + +public: + virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. + plMetalFragmentShader(plShader* owner); + virtual ~plMetalFragmentShader(); + + virtual void Release(); + void Link(plMetalFragmentShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalFragmentShader* GetNext() { return (plMetalFragmentShader*)fNext; } +}; + +#endif // plMetalFragmentShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp new file mode 100644 index 0000000000..a024263d41 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -0,0 +1,586 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +#include "plMetalMaterialShaderRef.h" + +#include "HeadSpin.h" +#include "hsBitVector.h" + +#include "plDrawable/plGBufferGroup.h" +#include "plGImage/plMipmap.h" +#include "plGImage/plCubicEnvironmap.h" +#include "plPipeline.h" +#include "plPipeDebugFlags.h" +#include "plPipeline/plCubicRenderTarget.h" +#include "plPipeline/plRenderTarget.h" +#include "plSurface/hsGMaterial.h" +#include "plSurface/plLayerInterface.h" + +#include "hsGMatState.inl" + +#include "plMetalDevice.h" +#include "plMetalPipeline.h" + +plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline *pipe) : +fPipeline { pipe }, +fMaterial { mat }, +fFragFunction(), +fNumPasses(0) +{ + fDevice = pipe->fDevice.fMetalDevice; + fFragFunction = pipe->fFragFunction; + CheckMateralRef(); +} + +plMetalMaterialShaderRef::~plMetalMaterialShaderRef() +{ + Release(); +} + +void plMetalMaterialShaderRef::Release() +{ + for(auto & buffer : fPassArgumentBuffers) { + buffer->release(); + buffer = nil; + } + fPassArgumentBuffers.clear(); + + for(auto & buffer : fPassColors) { + buffer->release(); + buffer = nil; + } + fPassColors.clear(); + + fNumPasses = 0; +} + +void plMetalMaterialShaderRef::CheckMateralRef() +{ + if(fNumPasses == 0) { + ILoopOverLayers(); + + for (size_t i = 0; i < fMaterial->GetNumLayers(); i++) { + plLayerInterface* layer = fMaterial->GetLayer(i); + if (!layer) { + continue; + } + + fPipeline->CheckTextureRef(layer); + } + } +} + +//fast encode doesn't support piggybacks or push over layers, but it does use preloaded data on the GPU so it's much faster. Use this encoder if there are no piggybacks or pushover layers +void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass) +{ + size_t i = 0; + for (i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { + plLayerInterface* layer = fMaterial->GetLayer(i); + + if (!layer) { + continue; + } + + fPipeline->CheckTextureRef(layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + continue; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + continue; + } + + assert(i - GetPassIndex(pass) >= 0); + EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); + IBuildLayerTexture(encoder, i, layer, nullptr); + } + + encoder->setFragmentBytes(fPassColors[pass], sizeof(simd_float4) * 8, FragmentShaderArgumentAttributeColors); + encoder->setFragmentBytes(fPassArgumentBuffers[pass], sizeof(plMetalFragmentShaderArgumentBuffer), BufferIndexFragArgBuffer); +} + +void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) +{ + //encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, BufferIndexFragArgBuffer); + + vertexUniforms->numUVSrcs = fPassLengths[pass]; + if(piggyBacks) { + vertexUniforms->numUVSrcs += piggyBacks->size(); + } + + simd_float4 colorMap[8]; + plMetalFragmentShaderArgumentBuffer uniforms; + uniforms.layerCount = 0; + + IHandleMaterial(GetPassIndex(pass), &uniforms, piggyBacks, + [&](plLayerInterface* layer, uint32_t index) { + layer = preEncodeTransform(layer, index); + IBuildLayerTexture(encoder, index, layer, colorMap); + return layer; + }, [&](plLayerInterface* layer, uint32_t index) { + layer = postEncodeTransform(layer, index); + return layer; + }); + + size_t i = 0; + for (i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { + plLayerInterface* layer = fMaterial->GetLayer(i); + + if (!layer) { + continue; + } + + fPipeline->CheckTextureRef(layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + continue; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + continue; + } + + assert(i - GetPassIndex(pass) >= 0); + EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); + } + + if(piggyBacks) { + for (size_t piggybackIndex = 0; piggybackIndex < piggyBacks->size(); piggybackIndex++) { + // Note that we take piggybacks off the end of piggyBacks. + plLayerInterface* layer = piggyBacks->at(piggyBacks->size() - 1 - piggybackIndex); + + if (!layer) { + continue; + } + + fPipeline->CheckTextureRef(layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + continue; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + continue; + } + + assert(i - GetPassIndex(pass) >= 0); + EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass) + piggybackIndex]); + } + } + + encoder->setFragmentBytes(colorMap, sizeof(colorMap), FragmentShaderArgumentAttributeColors); + encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), BufferIndexFragArgBuffer); +} + +void plMetalMaterialShaderRef::EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform) { + matrix_float4x4 tXfm; + hsMatrix2SIMD(layer->GetTransform(), &tXfm); + transform->transform = tXfm; + transform->flags = layer->GetMiscFlags(); + transform->UVWSrc = layer->GetUVWSrc(); +} + +//This is old - supporting the plate code. +//FIXME: Replace the plate codes path to texturing +void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass) +{ + int32_t numTextures = 0; + + plLayerInterface* layer = fMaterial->GetLayer(pass); + if (!layer) { + return; + } + fPipeline->CheckTextureRef(layer); + + // Load the image + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + return; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + return; + } + + if (plCubicEnvironmap::ConvertNoRef(layer->GetTexture()) != nullptr) { + } else if (plMipmap::ConvertNoRef(layer->GetTexture()) != nullptr || plRenderTarget::ConvertNoRef(layer->GetTexture()) != nullptr) { + encoder->setFragmentTexture(texRef->fTexture, Texture); + } + + numTextures++; +} + +void plMetalMaterialShaderRef::ILoopOverLayers() +{ + size_t j = 0; + size_t pass = 0; + + for (j = 0; j < fMaterial->GetNumLayers(); ) + { + size_t iCurrMat = j; + + //Create "fast encode" buffers + //Fast encode can be used when there are no piggybacks or pushover layers. We'll load as much of the + //base state of this layer as we can onto the GPU. Using fast encode, the renderer can avoid encoding + //a lot of the render state, it will be on the GPU already. + //I'd like to encode more data here, and use a heap. The heap hasn't happened yet because heaps are + //private memory, and we don't have a window yet for a blit phase into private memory. + MTL::Buffer *argumentBuffer = fDevice->newBuffer(sizeof(plMetalFragmentShaderArgumentBuffer), MTL::ResourceStorageModeManaged); + MTL::Buffer *colorBuffer = fDevice->newBuffer(sizeof(simd_float4) * 8, MTL::ResourceStorageModeManaged); + + j = IHandleMaterial(iCurrMat, (plMetalFragmentShaderArgumentBuffer *)argumentBuffer->contents(), nullptr, + [](plLayerInterface* layer, uint32_t index) { + return layer; + }, + [](plLayerInterface* layer, uint32_t index) { + return layer; + }); + + if (j == -1) + break; + + pass++; + + //encode the colors for this pass into our buffer for fast rendering + for(int colorToEncode = 0; colorToEncode < j - iCurrMat; colorToEncode ++) { + IBuildLayerTexture(NULL, colorToEncode, fMaterial->GetLayer(iCurrMat + colorToEncode), (simd_float4*) colorBuffer->contents()); + } + + argumentBuffer->didModifyRange(NS::Range(0, argumentBuffer->length())); + colorBuffer->didModifyRange(NS::Range(0, colorBuffer->length())); + + fPassArgumentBuffers.push_back(argumentBuffer); + fPassColors.push_back(colorBuffer); + + fPassIndices.push_back(iCurrMat); + fPassLengths.push_back(j - iCurrMat); + fNumPasses++; + +#if 0 + ISetFogParameters(fMaterial->GetLayer(iCurrMat)); +#endif + } +} + +const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerInterface* layer) +{ + hsGMatState state; + state.Composite(layer->GetState(), fPipeline->GetMaterialOverride(true), fPipeline->GetMaterialOverride(false)); + return state; +} + +void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer, simd_float4 *colorMap) +{ + fPipeline->CheckTextureRef(layer); + plBitmap* texture = layer->GetTexture(); + + if (texture != nullptr && encoder) { + plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); + if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { + encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeCubicTextures + offsetFromRootLayer); + } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { + encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); + } + + } else { + hsColorRGBA preshadeColor = layer->GetPreshadeColor(); + colorMap[offsetFromRootLayer].r = preshadeColor.r; + colorMap[offsetFromRootLayer].g = preshadeColor.g; + colorMap[offsetFromRootLayer].b = preshadeColor.b; + colorMap[offsetFromRootLayer].a = preshadeColor.a; + } +} + +void plMetalMaterialShaderRef::PopulateFragmentShaderLayerFromLayer(plFragmentShaderLayer *fragmentLayer, plLayerInterface* layer) { + hsGMatState state = ICompositeLayerState(layer); + plBitmap* texture = layer->GetTexture(); + if (texture != nullptr) { + plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); + if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { + fragmentLayer->passType = PassTypeCubicTexture; + } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { + fragmentLayer->passType = PassTypeTexture; + } + + } else { + fragmentLayer->passType = PassTypeColor; + } + + fragmentLayer->uvIndex = layer->GetUVWSrc(); + + switch (layer->GetClampFlags()) { + case hsGMatState::kClampTextureU: + fragmentLayer->sampleType = 1; + break; + case hsGMatState::kClampTextureV: + fragmentLayer->sampleType = 2; + break; + case hsGMatState::kClampTexture: + fragmentLayer->sampleType = 3; + break; + default: + fragmentLayer->sampleType = 0; + break; + } + + fragmentLayer->miscFlags = state.fMiscFlags; + fragmentLayer->blendMode = state.fBlendFlags; +} + +uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) +{ + uint32_t currNumLayers = 1; + + plLayerInterface* lay = fMaterial->GetLayer(which); + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagNoMultitexture)) { + return currNumLayers; + } + + if ((fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (lay->GetMiscFlags() & hsGMatState::kMiscBumpChans)) { + currNumLayers = 2; + return currNumLayers; + } + + if ((lay->GetBlendFlags() & hsGMatState::kBlendNoColor) || + (lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner)) { + return currNumLayers; + } + + int i; + int maxLayers = 8; + if (which + maxLayers > fMaterial->GetNumLayers()) { + maxLayers = fMaterial->GetNumLayers() - which; + } + + for (i = currNumLayers; i < maxLayers; i++) { + plLayerInterface* lay = fMaterial->GetLayer(which + i); + + // Ignoring max UVW limit + + if ((lay->GetMiscFlags() & hsGMatState::kMiscBindNext) && (i+1 >= maxLayers)) { + break; + } + + if (lay->GetMiscFlags() & hsGMatState::kMiscRestartPassHere) { + break; + } + + if (!(fMaterial->GetLayer(which + i - 1)->GetMiscFlags() & hsGMatState::kMiscBindNext) && !ICanEatLayer(lay)) { + break; + } + + currNumLayers++; + } + + return currNumLayers; +} + +bool plMetalMaterialShaderRef::ICanEatLayer(plLayerInterface* lay) +{ + if (!lay->GetTexture()) { + return false; + } + + if ((lay->GetBlendFlags() & hsGMatState::kBlendNoColor) || + (lay->GetBlendFlags() & hsGMatState::kBlendAddColorTimesAlpha) || + (lay->GetMiscFlags() & hsGMatState::kMiscTroubledLoner)) { + return false; + } + + if ((lay->GetBlendFlags() & hsGMatState::kBlendAlpha) && (lay->GetAmbientColor().a < 1.f)) { + return false; + } + + if (!(lay->GetZFlags() & hsGMatState::kZNoZWrite)) { + return false; + } + + return true; +} + +uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform) +{ + if (!fMaterial || layer >= fMaterial->GetNumLayers() || !fMaterial->GetLayer(layer)) { + return -1; + } + + if (false /*ISkipBumpMap(fMaterial, layer)*/) { + return -1; + } + + // Ignoring the bit about ATI Radeon and UVW limits + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagNoDecals) && (fMaterial->GetCompositeFlags() & hsGMaterial::kCompDecal)) { + return -1; + } + + // Ignoring the bit about self-rendering cube maps + + plLayerInterface* currLay = /*IPushOverBaseLayer*/ fMaterial->GetLayer(layer); + preEncodeTransform(currLay, 0); + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (currLay->GetMiscFlags() & hsGMatState::kMiscBumpDu)) { + currLay = fMaterial->GetLayer(++layer); + } + + //currLay = IPushOverAllLayer(currLay); + + hsGMatState state = ICompositeLayerState(currLay); + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagDisableSpecular)) { + state.fShadeFlags &= ~hsGMatState::kShadeSpecular; + } + + // Stuff about ZInc + + if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagNoAlphaBlending)) { + state.fBlendFlags &= ~hsGMatState::kBlendMask; + } + + if ((fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (state.fMiscFlags & hsGMatState::kMiscBumpChans) ) { + switch (state.fMiscFlags & hsGMatState::kMiscBumpChans) + { + case hsGMatState::kMiscBumpDu: + break; + case hsGMatState::kMiscBumpDv: + if (!(fMaterial->GetLayer(layer-2)->GetBlendFlags() & hsGMatState::kBlendAdd)) + { + state.fBlendFlags &= ~hsGMatState::kBlendMask; + state.fBlendFlags |= hsGMatState::kBlendMADD; + } + break; + case hsGMatState::kMiscBumpDw: + if (!(fMaterial->GetLayer(layer-1)->GetBlendFlags() & hsGMatState::kBlendAdd)) + { + state.fBlendFlags &= ~hsGMatState::kBlendMask; + state.fBlendFlags |= hsGMatState::kBlendMADD; + } + break; + default: + break; + } + } + + uint32_t currNumLayers = ILayersAtOnce(layer); + + if (state.fMiscFlags & (hsGMatState::kMiscBumpDu | hsGMatState::kMiscBumpDw)) { + //ISetBumpMatrices(currLay); + } + + PopulateFragmentShaderLayerFromLayer(&uniforms->layers[0], currLay); + + uniforms->layerCount++; + + postEncodeTransform(currLay, 0); + + int32_t i = 1; + for (i = 1; i < currNumLayers; i++) + { + + plLayerInterface* layPtr = fMaterial->GetLayer(layer + i); + if (!layPtr) { + return -1; + } + preEncodeTransform(layPtr, i); + + PopulateFragmentShaderLayerFromLayer(&uniforms->layers[i], layPtr); + + uniforms->layerCount++; + + postEncodeTransform(layPtr, i); + } + + if(piggybacks) { + for (int32_t currPiggyback = 0; currPiggyback < piggybacks->size(); currPiggyback++) + { + + plLayerInterface* layPtr = piggybacks->at(currPiggyback); + if (!layPtr) { + return -1; + } + preEncodeTransform(layPtr, i + currPiggyback); + + PopulateFragmentShaderLayerFromLayer(&uniforms->layers[i + currPiggyback], layPtr); + + uniforms->layerCount++; + + postEncodeTransform(layPtr, i + currPiggyback); + } + } + + if (state.fBlendFlags & (hsGMatState::kBlendTest | hsGMatState::kBlendAlpha | hsGMatState::kBlendAddColorTimesAlpha) && + !(state.fBlendFlags & hsGMatState::kBlendAlphaAlways)) + { + // AlphaTestHigh is used for reducing sort artifacts on textures that + // are mostly opaque or transparent, but have regions of translucency + // in transition. Like a texture for a bush billboard. It lets there be + // some transparency falloff, but quit drawing before it gets so + // transparent that draw order problems (halos) become apparent. + if (state.fBlendFlags & hsGMatState::kBlendAlphaTestHigh) { + uniforms->alphaThreshold = 64.f/255.f; + } else { + uniforms->alphaThreshold = 0.00000000001f; + } + } else { + uniforms->alphaThreshold = 0.f; + } + + return layer + currNumLayers; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h new file mode 100644 index 0000000000..f88e313d3f --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -0,0 +1,110 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalMaterialShaderRef_inc_ +#define _plMetalMaterialShaderRef_inc_ + +#include "hsGMatState.h" +#include "plMetalDeviceRef.h" +#include "ShaderTypes.h" + +#include + +class hsGMaterial; +class plMetalPipeline; +class plLayerInterface; + +class plMetalMaterialShaderRef : public plMetalDeviceRef +{ +protected: + plMetalPipeline* fPipeline; + hsGMaterial* fMaterial; + //temporary holder for the fragment shader to use, we don't own this reference + MTL::Function* fFragFunction; +private: + std::vector fPassIndices; + //FIXME: This should be retained/released + MTL::Device* fDevice; + std::vector fPassArgumentBuffers; + std::vector fPassColors; + +public: + void Link(plMetalMaterialShaderRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalMaterialShaderRef* GetNext() { return (plMetalMaterialShaderRef*)fNext; } + + plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline *pipe); + ~plMetalMaterialShaderRef(); + + void Release(); + void CheckMateralRef(); + + size_t GetNumPasses() const { return fNumPasses; } + size_t GetPassIndex(size_t which) const { return fPassIndices[which]; } + + void EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform); + void FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass); + //probably not a good idea to call prepareTextures directly + //mostly just a hack to keep plates working for now + void prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass); + std::vector fPassLengths; + + // Set the current Plasma state based on the input layer state and the material overrides. + // fMatOverOn overrides to set a state bit whether it is set in the layer or not. + // fMatOverOff overrides to clear a state bit whether it is set in the layer or not.s + const hsGMatState ICompositeLayerState(const plLayerInterface* layer); + + static plLayerInterface* Passthrough(plLayerInterface* layer, uint32_t index) { + return layer; + } +private: + void ILoopOverLayers(); + + uint32_t fNumPasses; + uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform); + bool ICanEatLayer(plLayerInterface* lay); + uint32_t ILayersAtOnce(uint32_t which); + + void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer, simd_float4 *colorMap); + void PopulateFragmentShaderLayerFromLayer(plFragmentShaderLayer *fragmentLayer, plLayerInterface* layer); + void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); +}; + +#endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp new file mode 100644 index 0000000000..2a9a4b415d --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -0,0 +1,4343 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "HeadSpin.h" +#include + +#include +#import + +#include "plQuality.h" + +#include "plMetalPipeline.h" +#include "plMetalMaterialShaderRef.h" +#include "plMetalPlateManager.h" + +#include "hsTimer.h" +#include "plPipeDebugFlags.h" +#include "plPipeResReq.h" + +#include "pnNetCommon/plNetApp.h" // for dbg logging +#include "pnMessage/plPipeResMakeMsg.h" +#include "plAvatar/plAvatarClothing.h" +#include "plDrawable/plDrawableSpans.h" +#include "plDrawable/plGBufferGroup.h" +#include "plGImage/plMipmap.h" +#include "plGLight/plLightInfo.h" +#include "plPipeline/plCubicRenderTarget.h" +#include "plPipeline/plDebugText.h" +#include "plPipeline/plDynamicEnvMap.h" +#include "plScene/plRenderRequest.h" +#include "plSurface/hsGMaterial.h" +#include "plSurface/plLayer.h" +#include "pfCamera/plVirtualCamNeu.h" +#include "plMessage/plDeviceRecreateMsg.h" +#include "plgDispatch.h" +#include "plDrawable/plAuxSpan.h" +#include "plSurface/plLayerShadowBase.h" + +#include "plGImage/plMipmap.h" +#include "plGImage/plCubicEnvironmap.h" + +#include "plGLight/plShadowSlave.h" +#include "plGLight/plShadowCaster.h" + +#include "plTweak.h" + +#include "plMetalVertexShader.h" +#include "plMetalFragmentShader.h" + +#include "hsGMatState.inl" + +#include "plProfile.h" + +plProfile_CreateCounter("Feed Triangles", "Draw", DrawFeedTriangles); +plProfile_CreateCounter("Draw Prim Static", "Draw", DrawPrimStatic); +plProfile_CreateMemCounter("Total Texture Size", "Draw", TotalTexSize); +plProfile_CreateCounter("Layer Change", "Draw", LayChange); +plProfile_Extern(DrawTriangles); +plProfile_Extern(MatChange); + +plProfile_CreateTimer("PrepShadows", "PipeT", PrepShadows); +plProfile_CreateTimer("PrepDrawable", "PipeT", PrepDrawable); +plProfile_CreateTimer(" Skin", "PipeT", Skin); +plProfile_CreateTimer("RenderSpan", "PipeT", RenderSpan); +plProfile_CreateTimer(" MergeCheck", "PipeT", MergeCheck); +plProfile_CreateTimer(" MergeSpan", "PipeT", MergeSpan); +plProfile_CreateTimer(" SpanTransforms", "PipeT", SpanTransforms); +plProfile_CreateTimer(" SpanFog", "PipeT", SpanFog); +plProfile_CreateTimer(" SelectLights", "PipeT", SelectLights); +plProfile_CreateTimer(" SelectProj", "PipeT", SelectProj); +plProfile_CreateTimer(" CheckDyn", "PipeT", CheckDyn); +plProfile_CreateTimer(" CheckStat", "PipeT", CheckStat); +plProfile_CreateTimer(" RenderBuff", "PipeT", RenderBuff); +plProfile_CreateTimer(" RenderPrim", "PipeT", RenderPrim); +plProfile_CreateTimer("PlateMgr", "PipeT", PlateMgr); +plProfile_CreateTimer("DebugText", "PipeT", DebugText); +plProfile_CreateTimer("Reset", "PipeT", Reset); + +plProfile_CreateCounterNoReset("Reload", "PipeC", PipeReload); +plProfile_CreateCounter("AvRTPoolUsed", "PipeC", AvRTPoolUsed); +plProfile_CreateCounter("AvRTPoolCount", "PipeC", AvRTPoolCount); +plProfile_CreateCounter("AvRTPoolRes", "PipeC", AvRTPoolRes); +plProfile_CreateCounter("AvRTShrinkTime", "PipeC", AvRTShrinkTime); +plProfile_CreateCounter("NumSkin", "PipeC", NumSkin); + +class plRenderTriListFunc : public plRenderPrimFunc +{ +protected: + plMetalDevice* fDevice; + int fBaseVertexIndex; + int fVStart; + int fVLength; + int fIStart; + int fNumTris; +public: + plRenderTriListFunc(plMetalDevice* device, int baseVertexIndex, + int vStart, int vLength, int iStart, int iNumTris) + : fDevice(device), fBaseVertexIndex(baseVertexIndex), fVStart(vStart), fVLength(vLength), fIStart(iStart), fNumTris(iNumTris) {} + + bool RenderPrims() const override; +}; + +bool plRenderTriListFunc::RenderPrims() const +{ + plProfile_IncCount(DrawFeedTriangles, fNumTris); + plProfile_IncCount(DrawTriangles, fNumTris); + plProfile_Inc(DrawPrimStatic); + + + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), BufferIndexState); + fDevice->CurrentRenderCommandEncoder()->setFragmentBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), BufferIndexState); + fDevice->CurrentRenderCommandEncoder()->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, fNumTris, MTL::IndexTypeUInt16, fDevice->fCurrentIndexBuffer, (sizeof(uint16_t) * fIStart)); +} + + + +plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), fPipelineState(nullptr), fCurrentRenderPassUniforms(nullptr), currentDrawableCallback(nullptr), fFragFunction(nullptr), fVShaderRefList(nullptr), fPShaderRefList(nullptr), fULutTextureRef(nullptr) +{ + fCurrLayerIdx = 0; + fDevice.fPipeline = this; + + //Compile the shaders and link our pipeline + MTL::Library *library = fDevice.fMetalDevice->newDefaultLibrary(); + MTL::Function *fragFunction = library->newFunction( + NS::String::string("fragmentShader", NS::ASCIIStringEncoding) + ); + MTL::Function *vertFunction = library->newFunction( + NS::String::string("plateVertexShader", NS::ASCIIStringEncoding) + ); + MTL::RenderPipelineDescriptor *descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + descriptor->setFragmentFunction(fragFunction); + descriptor->setVertexFunction(vertFunction); + descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + + NS::Error *error; + fPipelineState = fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); + library->release(); + fragFunction->release(); + vertFunction->release(); + descriptor->release(); + + fMaxLayersAtOnce = 8; + + // Alloc half our simultaneous textures to piggybacks. + // Won't hurt us unless we try to many things at once. + fMaxPiggyBacks = fMaxLayersAtOnce >> 1; + + // Less than 4 layers at once means we have to fallback on uv bumpmapping + if (fMaxLayersAtOnce < 4) + SetDebugFlag(plPipeDbg::kFlagBumpUV, true); + //plDynamicCamMap::SetCapable(false); + plQuality::SetQuality(fDefaultPipeParams.VideoQuality); + //plQuality::SetCapability(fDefaultPipeParams.VideoQuality); + plQuality::SetCapability(plQuality::kPS_3); + //plShadowCaster::EnableShadowCast(false); + + fPlateMgr = new plMetalPlateManager(this); + + fCurrentRenderPassUniforms = (VertexUniforms *) calloc(sizeof(VertexUniforms), sizeof(char)); + + //FIXME: Add ICreateDynDeviceObjects like DX + // RenderTarget pools are shared for our shadow generation algorithm. + // Different sizes for different resolutions. + IMakeRenderTargetPools(); +} + +plMetalPipeline::~plMetalPipeline() +{ + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) + { + pm->IReleaseGeometry(); + } +} + +bool plMetalPipeline::PreRender(plDrawable *drawable, std::vector &visList, plVisMgr *visMgr) +{ + plDrawableSpans *ds = plDrawableSpans::ConvertNoRef(drawable); + if (!ds) { + return false; + } + + if ((ds->GetType() & fView.GetDrawableTypeMask()) == 0) { + return false; + } + + fView.GetVisibleSpans(ds, visList, visMgr); + + return visList.size() > 0; +} + +bool plMetalPipeline::PrepForRender(plDrawable *drawable, std::vector &visList, plVisMgr *visMgr) +{ + plProfile_BeginTiming(PrepDrawable); + + plDrawableSpans* ice = plDrawableSpans::ConvertNoRef(drawable); + if (!ice) { + plProfile_EndTiming(PrepDrawable); + return false; + } + + // Find our lights + ICheckLighting(ice, visList, visMgr); + + // Sort our faces + if (ice->GetNativeProperty(plDrawable::kPropSortFaces)) { + ice->SortVisibleSpans(visList, this); + } + + // Prep for render. This is gives the drawable a chance to + // do any last minute updates for its buffers, including + // generating particle tri lists. + ice->PrepForRender(this); + + // Any skinning necessary + if (!ISoftwareVertexBlend(ice, visList)) { + plProfile_EndTiming(PrepDrawable); + return false; + } + + // Other stuff that we're ignoring for now... + + plProfile_EndTiming(PrepDrawable); + + return true; +} + +plTextFont *plMetalPipeline::MakeTextFont(char *face, uint16_t size) { return nullptr; } + +bool plMetalPipeline::OpenAccess(plAccessSpan &dst, plDrawableSpans *d, const plVertexSpan *span, bool readOnly) { return false; } + +bool plMetalPipeline::CloseAccess(plAccessSpan &acc) { return false; } + +void plMetalPipeline::PushRenderRequest(plRenderRequest *req) +{ + // Save these, since we want to copy them to our current view + hsMatrix44 l2w = fView.GetLocalToWorld(); + hsMatrix44 w2l = fView.GetWorldToLocal(); + + plFogEnvironment defFog = fView.GetDefaultFog(); + + fViewStack.push(fView); + + SetViewTransform(req->GetViewTransform()); + + PushRenderTarget(req->GetRenderTarget()); + fView.fRenderState = req->GetRenderState(); + + fView.fRenderRequest = req; + hsRefCnt_SafeRef(fView.fRenderRequest); + + SetDrawableTypeMask(req->GetDrawableMask()); + SetSubDrawableTypeMask(req->GetSubDrawableMask()); + + float depth = req->GetClearDepth(); + fView.SetClear(&req->GetClearColor(), &depth); + + if (req->GetOverrideMat()) { + PushOverrideMaterial(req->GetOverrideMat()); + } + + // Set from our saved ones... + fView.SetWorldToLocal(w2l); + fView.SetLocalToWorld(l2w); + + RefreshMatrices(); + + if (req->GetIgnoreOccluders()) { + fView.SetMaxCullNodes(0); + } + + fView.fCullTreeDirty = true; +} + +void plMetalPipeline::PopRenderRequest(plRenderRequest *req) +{ + if (req->GetOverrideMat()) { + PopOverrideMaterial(nil); + } + + //new render target means we can't use the previous pipeline state + //it won't be set yet on the new target + //in theory we could have a stack of these so when we unwind we + //could get the state back. + fCurrentPipelineState = nullptr; + fCurrentDepthStencilState = nullptr; + + fCurrentPipelineState = nullptr; + + hsRefCnt_SafeUnRef(fView.fRenderRequest); + fView = fViewStack.top(); + fViewStack.pop(); + + PopRenderTarget(); + fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera; +} + +void plMetalPipeline::ClearRenderTarget(plDrawable *d) +{ + plDrawableSpans* src = plDrawableSpans::ConvertNoRef(d); + + if( !src ) + { + ClearRenderTarget(); + return; + } + + Draw(d); +} + +void plMetalPipeline::ClearRenderTarget(const hsColorRGBA *col, const float *depth) +{ + if (fView.fRenderState & (kRenderClearColor | kRenderClearDepth)) { + hsColorRGBA clearColor = col ? *col : GetClearColor(); + float clearDepth = depth ? *depth : fView.GetClearDepth(); + //FIXME: Depth and color are always cleared, do we need to implement handling for not clearing them? + + fDevice.SetClearColor({clearColor.r, clearColor.g, clearColor.b, clearColor.a}); + } +} + +hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) +{ + plMetalRenderTargetRef* ref = nullptr; + MTL::Texture *depthBuffer = nullptr; + plCubicRenderTarget *cubicRT; + + // If we have Shader Model 3 and support non-POT textures, let's make reflections the pipe size +#if 1 + if (plDynamicCamMap* camMap = plDynamicCamMap::ConvertNoRef(owner)) { + //if ((plQuality::GetCapability() > plQuality::kPS_2) && fSettings.fD3DCaps & kCapsNpotTextures) + camMap->ResizeViewport(IGetViewTransform()); + } +#endif + + /// Check--is this renderTarget really a child of a cubicRenderTarget? + if (owner->GetParent()) { + /// This'll create the deviceRefs for all of its children as well + MakeRenderTargetRef(owner->GetParent()); + return owner->GetDeviceRef(); + } + + // If we already have a rendertargetref, we just need it filled out with D3D resources. + if (owner->GetDeviceRef()) + ref = (plMetalRenderTargetRef*)owner->GetDeviceRef(); + + /// Create the render target now + // Start with the depth surface. + // Note that we only ever give a cubic rendertarget a single shared depth buffer, + // since we only render one face at a time. If we were rendering part of face X, then part + // of face Y, then more of face X, then they would all need their own depth buffers. + if (owner->GetZDepth() && (owner->GetFlags() & (plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen))) { + MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + owner->GetWidth(), + owner->GetHeight(), + false); + if(fDevice.fMetalDevice->hasUnifiedMemory()) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + depthBuffer = fDevice.fMetalDevice->newTexture(depthTextureDescriptor); + } + + + // See if it's a cubic render target. + // Primary consumer here is the vertex/pixel shader water. + cubicRT = plCubicRenderTarget::ConvertNoRef( owner ); + if( cubicRT ) + { + if (!ref) + ref = new plMetalRenderTargetRef(); + + MTL::TextureDescriptor *textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), false); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + + plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); + MTL::Texture * texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + + /// Create a CUBIC texture + for( int i = 0; i < 6; i++ ) + { + plRenderTarget *face = cubicRT->GetFace( i ); + plMetalRenderTargetRef *fRef; + + if( face->GetDeviceRef() != nil ) + { + fRef = (plMetalRenderTargetRef *)face->GetDeviceRef(); + if( !fRef->IsLinked() ) + fRef->Link( &fRenderTargetRefList ); + } + else + { + fRef = new plMetalRenderTargetRef(); + fRef->SetDirty(true); + + face->SetDeviceRef(fRef); + ( (plMetalRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList ); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef( face->GetDeviceRef() ); + } + if(fRef->IsDirty()) { + static const uint kFaceMapping[] = { + 1, // kLeftFace + 0, // kRightFace + 4, // kFrontFace + 5, // kBackFace + 2, // kTopFace + 3 // kBottomFace + }; + + if(fRef->fTexture) { + fRef->fTexture->release(); + fRef->fTexture = nullptr; + } + + if(fRef->fDepthBuffer) { + fRef->fDepthBuffer->release(); + fRef->fDepthBuffer = nullptr; + } + + fRef->fTexture = texture->newTextureView(MTL::PixelFormatBGRA8Unorm, MTL::TextureType2D, NS::Range::Make(0, 1), NS::Range::Make(kFaceMapping[i], 1)); + //in since the depth buffer is shared each render target gets their own retain + fRef->fDepthBuffer = depthBuffer->retain(); + fRef->SetDirty(false); + } + } + + //if the ref already has an old texture, release it + if(ref->fTexture) + ref->fTexture->release(); + if(ref->fDepthBuffer) + ref->fDepthBuffer->release(); + ref->fTexture = texture; + ref->fDepthBuffer = depthBuffer; + ref->fOwner = owner; + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + return ref; + } + else if (owner->GetFlags() & plRenderTarget::kIsTexture) { + if (!ref) + ref = new plMetalRenderTargetRef(); + + MTL::TextureDescriptor *textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setWidth(owner->GetWidth()); + textureDescriptor->setHeight(owner->GetHeight()); + textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + textureDescriptor->setStorageMode(MTL::StorageModePrivate); + + plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); + MTL::Texture * texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + textureDescriptor->release(); + + //if the ref already has an old texture, release it + if(ref->fTexture) + ref->fTexture->release(); + if(ref->fDepthBuffer) + ref->fDepthBuffer->release(); + ref->fTexture = texture; + ref->fDepthBuffer = depthBuffer; + ref->fOwner = owner; + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + return ref; + } + + // Not a texture either, must be a plain offscreen. + // Offscreen isn't currently used for anything. + else if (owner->GetFlags() & plRenderTarget::kIsOffscreen) { + /// Create a blank surface + //if (ref) + // ref->Set(surfFormat, 0, owner); + //else + // ref = new plGLRenderTargetRef(surfFormat, 0, owner); + } + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + // Mark as not dirty so it doesn't get re-created + if (ref != nullptr) + ref->SetDirty(false); + + return ref; +} + +bool plMetalPipeline::BeginRender() +{ + // offset transform + RefreshScreenMatrices(); + + fCurrentPipelineState = nullptr; + + // offset transform + RefreshScreenMatrices(); + + // If this is the primary BeginRender, make sure we're really ready. + if (fInSceneDepth++ == 0) { + /// If we have a renderTarget active, use its viewport + fDevice.SetViewport(); + + fDevice.BeginRender(); + + fVtxRefTime++; + plMetalBufferPoolRef::SetFrameTime(fVtxRefTime); + + // Render any shadow maps that have been submitted for this frame. + IPreprocessShadows(); + IPreprocessAvatarTextures(); + + CA::MetalDrawable *drawable = currentDrawableCallback(); + if(!drawable) { + return false; + } + fDevice.CreateNewCommandBuffer(drawable); + drawable->release(); + } + + fCurrentCullMode = MTL::CullMode(-1); + + fRenderCnt++; + + // Would probably rather this be an input. + fTime = hsTimer::GetSysSeconds(); + + return false; +} + +bool plMetalPipeline::EndRender() +{ + bool retVal = false; + + if (--fInSceneDepth == 0) { + fDevice.SubmitCommandBuffer(); + fCurrentPipelineState = nullptr; + fCurrentDepthStencilState = nullptr; + + IClearShadowSlaves(); + } + + // Do this last, after we've drawn everything + // Just letting go of things we're done with for the frame. + hsRefCnt_SafeUnRef(fCurrMaterial); + fCurrMaterial = nullptr; + + for (int i = 0; i < 8; i++) { + if (fLayerRef[i]) { + hsRefCnt_SafeUnRef(fLayerRef[i]); + fLayerRef[i] = nullptr; + } + } + + return retVal; +} + +void plMetalPipeline::RenderScreenElements() { + bool reset = false; + + if (fView.HasCullProxy()) + { + Draw(fView.GetCullProxy()); + } + + + hsGMatState tHack = PushMaterialOverride(hsGMatState::kMisc, hsGMatState::kMiscWireFrame, false); + hsGMatState ambHack = PushMaterialOverride(hsGMatState::kShade, hsGMatState::kShadeWhite, true); + + plProfile_BeginTiming(PlateMgr); + // Plates + if (fPlateMgr) + { + fPlateMgr->DrawToDevice(this); + reset = true; + } + plProfile_EndTiming(PlateMgr); + + PopMaterialOverride(ambHack, true); + PopMaterialOverride(tHack, false); + + plProfile_BeginTiming(DebugText); + /// Debug text + if (fDebugTextMgr && plDebugText::Instance().IsEnabled()) + { + fDebugTextMgr->DrawToDevice(this); + reset = true; + } + plProfile_EndTiming(DebugText); + + plProfile_BeginTiming(Reset); + if (reset) + { + fView.fXformResetFlags = fView.kResetAll; // Text destroys view transforms + } + plProfile_EndTiming(Reset); +} + +bool plMetalPipeline::IsFullScreen() const { return false; } + +void plMetalPipeline::Resize(uint32_t width, uint32_t height) +{ + /* + Resize had a bunch of notes on the DX version about how it was an old function, replaced by ResetDisplayDevice. I'll implement it for now, but consider moving over to ResetDisplayDevice. + + This function is cheaper than resetting the entire display device though. + */ + hsMatrix44 w2c, c2w, proj; + + // Store some states that we *want* to restore back... + plViewTransform resetTransform = GetViewTransform(); + + // Destroy old + //IReleaseDeviceObjects(); + IReleaseDynDeviceObjects(); + + // Reset width and height + if( width != 0 && height != 0 ) + { + // Width and height of zero mean just recreate + fOrigWidth = width; + fOrigHeight = height; + IGetViewTransform().SetScreenSize((uint16_t)(fOrigWidth), (uint16_t)(fOrigHeight)); + resetTransform.SetScreenSize((uint16_t)(fOrigWidth), (uint16_t)(fOrigHeight)); + } + else + { + // Just for debug + hsStatusMessage( "Recreating the pipeline...\n" ); + } + + // Restore states + SetViewTransform(resetTransform); + IProjectionMatrixToDevice(); + + plVirtualCam1::Refresh(); + + ICreateDynDeviceObjects(); + + /// Broadcast a message letting everyone know that we were recreated and that + /// all device-specific stuff needs to be recreated + plDeviceRecreateMsg* clean = new plDeviceRecreateMsg(this); + plgDispatch::MsgSend(clean); +} + +void plMetalPipeline::LoadResources() +{ + hsStatusMessageF("Begin Device Reload t=%f",hsTimer::GetSeconds()); + plNetClientApp::StaticDebugMsg("Begin Device Reload"); + + if(fFragFunction == nil) { + FindFragFunction(); + } + + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) + pm->IReleaseGeometry(); + + IReleaseAvRTPool(); + + // Create all RenderTargets + plPipeRTMakeMsg* rtMake = new plPipeRTMakeMsg(this); + rtMake->Send(); + + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) + pm->ICreateGeometry(); + + // Create all POOL_DEFAULT (sorted) index buffers in the scene. + plPipeGeoMakeMsg* defMake = new plPipeGeoMakeMsg(this, true); + defMake->Send(); + + // This can be a bit of a mem hog and will use more mem if available, so + // keep it last in the POOL_DEFAULT allocs. + IFillAvRTPool(); + + // Force a create of all our static vertex buffers. + plPipeGeoMakeMsg* manMake = new plPipeGeoMakeMsg(this, false); + manMake->Send(); + + // Okay, we've done it, clear the request. + plPipeResReq::Clear(); + + plProfile_IncCount(PipeReload, 1); + + hsStatusMessageF("End Device Reload t=%f",hsTimer::GetSeconds()); + plNetClientApp::StaticDebugMsg("End Device Reload"); +} + +bool plMetalPipeline::SetGamma(float eR, float eG, float eB) +{ + //FIXME: Implement Gamma + return false; +} + +bool plMetalPipeline::SetGamma(const uint16_t *const tabR, const uint16_t *const tabG, const uint16_t *const tabB) +{ + //FIXME: Implement Gamma + return false; +} + +bool plMetalPipeline::CaptureScreen(plMipmap *dest, bool flipVertical, uint16_t desiredWidth, uint16_t desiredHeight) +{ + //FIXME: Screen capture + return false; +} + +plMipmap *plMetalPipeline::ExtractMipMap(plRenderTarget *targ) +{ + //FIXME: Add mip map extraction + //find who calls this to test + return nullptr; +} + +void plMetalPipeline::GetSupportedDisplayModes(std::vector *res, int ColorDepth) +{ + /* + There are decisions to make here. + + Modern macOS does not support "display modes." You panel runs at native resolution at all times, and you can over-render or under-render. But you never set the display mode of the panel, or get the display mode of the panel. Most games have a "scale slider." + + Note: There are legacy APIs for display modes for compatibility with older software. In since we're here writing a new renderer, lets do things the right way. The display mode APIs also have trouble with density. I.E. a 4k display might be reported as a 2k display if the window manager is running in a higher DPI mode. + + The basic approach should be to render at whatever the resolution of our output surface is. We're mostly doing that now (aspect ratio doesn't adjust.) + + Ideally we should support some sort of scaling/semi dynamic renderbuffer resolution thing. But don't mess with the window servers framebuffer size. macOS has accelerated resolution scaling like consoles do. Use that. + */ +} + +int plMetalPipeline::GetMaxAnisotropicSamples() +{ + //FIXME: Fix antialiasing + return 0; +} + +int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) +{ + //FIXME: Fix antialiasing + return 0; +} + +void plMetalPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync) +{ + //FIXME: Whats this? + //Seems like an entry point for passing in display settings. +} + +void plMetalPipeline::RenderSpans(plDrawableSpans *ice, const std::vector &visList) +{ + plProfile_BeginTiming(RenderSpan); + + hsMatrix44 lastL2W; + size_t i, j; + hsGMaterial* material; + const std::vector& spans = ice->GetSpanArray(); + + //plProfile_IncCount(EmptyList, !visList.GetCount()); + + /// Set this (*before* we do our TestVisibleWorld stuff...) + lastL2W.Reset(); + ISetLocalToWorld(lastL2W, lastL2W); // This is necessary; otherwise, we have to test for + // the first transform set, since this'll be identity + // but the actual device transform won't be (unless + // we do this) + + + /// Loop through our spans, combining them when possible + for (i = 0; i < visList.size(); ) { + if (GetOverrideMaterial() != nullptr) { + material = GetOverrideMaterial(); + } else { + material = ice->GetMaterial(spans[visList[i]]->fMaterialIdx); + } + + /// It's an icicle--do our icicle merge loop + plIcicle tempIce(*((plIcicle*)spans[visList[i]])); + + // Start at i + 1, look for as many spans as we can add to tempIce + for (j = i + 1; j < visList.size(); j++) { + if (GetOverrideMaterial()) { + tempIce.fMaterialIdx = spans[visList[j]]->fMaterialIdx; + } + + plProfile_BeginTiming(MergeCheck); + if (!spans[visList[j]]->CanMergeInto(&tempIce)) { + plProfile_EndTiming(MergeCheck); + break; + } + plProfile_EndTiming(MergeCheck); + //plProfile_Inc(SpanMerge); + + plProfile_BeginTiming(MergeSpan); + spans[visList[j]]->MergeInto(&tempIce); + plProfile_EndTiming(MergeSpan); + } + + if (material != nullptr) { + // First, do we have a device ref at this index? + plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + + if (mRef == nullptr) { + mRef = new plMetalMaterialShaderRef(material, this); + material->SetDeviceRef(mRef); + } + + if (!mRef->IsLinked()) { + mRef->Link(&fMatRefList); + } + + hsGDeviceRef* vb = ice->GetVertexRef( tempIce.fGroupIdx, tempIce.fVBufferIdx ); + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; + + // What do we change? + + plProfile_BeginTiming(SpanTransforms); + ISetupTransforms(ice, tempIce, mRef, lastL2W); + plProfile_EndTiming(SpanTransforms); + + // Check that the underlying buffers are ready to go. + plProfile_BeginTiming(CheckDyn); + ICheckDynBuffers(ice, ice->GetBufferGroup(tempIce.fGroupIdx), &tempIce); + plProfile_EndTiming(CheckDyn); + + plProfile_BeginTiming(CheckStat); + plGBufferGroup* grp = ice->GetBufferGroup(tempIce.fGroupIdx); + CheckVertexBufferRef(grp, tempIce.fVBufferIdx); + CheckIndexBufferRef(grp, tempIce.fIBufferIdx); + plProfile_EndTiming(CheckStat); + + // Draw this span now + IRenderBufferSpan( tempIce, + vb, + ice->GetIndexRef( tempIce.fGroupIdx, tempIce.fIBufferIdx ), + material, + tempIce.fVStartIdx, tempIce.fVLength, // These are used as our accumulated range + tempIce.fIPackedIdx, tempIce.fILength ); + } + + // Restart our search... + i = j; + } + + plProfile_EndTiming(RenderSpan); + /// All done! +} + +void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, plMetalMaterialShaderRef* mRef, hsMatrix44& lastL2W) +{ + if (span.fNumMatrices) { + if (span.fNumMatrices <= 2) { + ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal ); + lastL2W = span.fLocalToWorld; + } else { + lastL2W.Reset(); + ISetLocalToWorld( lastL2W, lastL2W ); + fView.fLocalToWorldLeftHanded = span.fLocalToWorld.GetParity(); + } + } else if (lastL2W != span.fLocalToWorld) { + ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal ); + lastL2W = span.fLocalToWorld; + } else { + fView.fLocalToWorldLeftHanded = lastL2W.GetParity(); + } + +#if 0 // Skinning + if( span.fNumMatrices == 2 ) + { + D3DXMATRIX mat; + IMatrix44ToD3DMatrix(mat, drawable->GetPaletteMatrix(span.fBaseMatrix+1)); + fD3DDevice->SetTransform(D3DTS_WORLDMATRIX(1), &mat); + fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_1WEIGHTS); + } + else + { + fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE); + } +#endif + + fCurrentRenderPassUniforms->projectionMatrix = fDevice.fMatrixProj; + fCurrentRenderPassUniforms->worldToCameraMatrix = fDevice.fMatrixW2C; + fCurrentRenderPassUniforms->cameraToWorldMatrix = fDevice.fMatrixC2W; + fCurrentRenderPassUniforms->localToWorldMatrix = fDevice.fMatrixL2W; + fCurrentRenderPassUniforms->worldToLocalMatrix = fDevice.fMatrixW2L; +} + +void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, + hsGDeviceRef* ib, hsGMaterial* material, + uint32_t vStart, uint32_t vLength, + uint32_t iStart, uint32_t iLength) +{ + plProfile_BeginTiming(RenderBuff); + + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)ib; + plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + mRef->CheckMateralRef(); + + if (!vRef || !vRef->GetBuffer() || !iRef->GetBuffer()) { + plProfile_EndTiming(RenderBuff); + + hsAssert( false, ST::format("Trying to render a nil buffer pair! (Mat: {})", material->GetKeyName()).c_str() ); + return; + } + + /* Index Buffer stuff and drawing */ + + plRenderTriListFunc render(&fDevice, 0, vStart, vLength, iStart, iLength); + + plProfile_EndTiming(RenderBuff); + +#if 1 + // Enable this for LayerAnimations, but the timing/speed seems wrong + for (size_t i = 0; i < material->GetNumLayers(); i++) { + plLayerInterface* lay = material->GetLayer(i); + if (lay) { + lay->Eval(fTime, fFrame, 0); + } + } +#endif + + // Turn on this spans lights and turn off the rest. + ISelectLights(&span, mRef); + +#ifdef _DEBUG + fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string(material->GetKeyName().c_str(), NS::UTF8StringEncoding)); +#endif + + /* Vertex Buffer stuff */ + if(!vRef->GetBuffer()) { + return; + } + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + + IPushPiggyBacks(material); + hsRefCnt_SafeAssign(fCurrMaterial, material); + size_t pass; + for (pass = 0; pass < mRef->GetNumPasses(); pass++) { + + if ( IHandleMaterial(material, pass, &span, vRef) ) { + render.RenderPrims(); + } + + //Projection wants to do it's own lighting, push the current lighting state + //so we can keep the same light calculations on the next pass + PushCurrentLightSources(); + + plProfile_BeginTiming(SelectProj); + ISelectLights( &span, mRef, true ); + plProfile_EndTiming(SelectProj); + + // Take care of projections that get applied to each pass. + if( fProjEach.size() && !(fView.fRenderState & kRenderNoProjection) ) + { +#ifdef _DEBUG + fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string("Render projections", NS::UTF8StringEncoding)); +#endif + IRenderProjectionEach(render, material, pass, span, vRef); +#ifdef _DEBUG + fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); +#endif + } + //Revert the light state back to what we had before projections + PopCurrentLightSources(); + + if (IsDebugFlagSet(plPipeDbg::kFlagNoUpperLayers)) + pass = mRef->GetNumPasses(); + } + + IPopPiggyBacks(); + + // Render any aux spans associated. + if( span.GetNumAuxSpans() ) + IRenderAuxSpans(span); + + + + // Only render projections and shadows if we successfully rendered the span. + // j == -1 means we aborted render. + if( pass >= 0 ) + { + // Projections that get applied to the frame buffer (after all passes). + //if( fLights.fProjAll.GetCount() && !(fView.fRenderState & kRenderNoProjection) ) + // IRenderProjections(render); + + // Handle render of shadows onto geometry. + if( fShadows.size() ) { + //if we had to render aux spans, we probably changed the vertex and index buffer + //reset those + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + + IRenderShadowsOntoSpan(render, &span, material, vRef); + } + + if( !(fView.fRenderState & kRenderNoProjection) ) + { + } + } + +#ifdef _DEBUG + fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); +#endif +} + +// IRenderProjectionEach /////////////////////////////////////////////////////////////////////////////////////// +// Render any lights that are to be projected onto each pass of the object. +void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef) +{ + // If this is a bump map pass, forget it, we've already "done" per-pixel lighting. + //if( fLayerState[iPass].fMiscFlags & (hsGMatState::kMiscBumpLayer | hsGMatState::kMiscBumpChans) ) + // return; + + // Push the LayerShadowBase override. This sets the blend + // to framebuffer as Add/ZNoWrite and AmbientColor = 0. + static plLayerLightBase layLightBase; + + // For each projector: + int k; + for( k = 0; k < fProjEach.size(); k++ ) + { + // Push it's projected texture as a piggyback. + plLightInfo* li = fProjEach[k]; + plMetalMaterialShaderRef *mRef = (plMetalMaterialShaderRef *)material->GetDeviceRef(); + + plLayerInterface* proj = li->GetProjection(); + hsAssert(proj, "A projector with no texture to project?"); + IPushProjPiggyBack(proj); + + // Enable the projecting light only. + IEnableLight(mRef, 7, li); + + AppendLayerInterface(&layLightBase, false); + + IHandleMaterial( material, iPass, &span, vRef, false ); + + //FIXME: Hard setting of light + IScaleLight(mRef, 7, true); + //mRef->encodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, iPass, fActivePiggyBacks, &fPiggyBackStack, fOverBaseLayer); + + // Do the render with projection. + render.RenderPrims(); + + RemoveLayerInterface(&layLightBase, false); + + // Disable the projecting light + IDisableLight(mRef, 7); + + // Pop it's projected texture off piggyback + IPopProjPiggyBacks(); + + } + +} + + +// ICheckAuxBuffers /////////////////////////////////////////////////////////////////////// +// The AuxBuffers are associated with drawables for things to be drawn right after that +// drawable's contents. In particular, see the plDynaDecal, which includes things like +// water ripples, bullet hits, and footprints. +// This function just makes sure they are ready to be rendered, called right before +// the rendering. +bool plMetalPipeline::ICheckAuxBuffers(const plAuxSpan* span) +{ + plGBufferGroup* group = span->fGroup; + + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); + if( !vRef ) + return true; + + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx); + if( !iRef ) + return true; + + // If our vertex buffer ref is volatile and the timestamp is off + // then it needs to be refilled + if( vRef->Expired(fVtxRefTime) ) + { + IRefreshDynVertices(group, vRef); + } + + return false; // No error +} + +// IRenderAuxSpans //////////////////////////////////////////////////////////////////////////// +// Save and restore render state around calls to IRenderAuxSpan. This lets +// a list of aux spans get rendered with only one save/restore state. +void plMetalPipeline::IRenderAuxSpans(const plSpan& span) +{ + if (IsDebugFlagSet(plPipeDbg::kFlagNoAuxSpans)) + return; + + ISetLocalToWorld(hsMatrix44::IdentityMatrix(), hsMatrix44::IdentityMatrix()); + + int i; + for( i = 0; i < span.GetNumAuxSpans(); i++ ) + IRenderAuxSpan(span, span.GetAuxSpan(i)); + + ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal); + +} + +// IRenderAuxSpan ////////////////////////////////////////////////////////// +// Aux spans (auxilliary) are geometry rendered immediately after, and therefore dependent, on +// other normal geometry. They don't have SceneObjects, Drawables, DrawInterfaces or +// any of that, and therefore don't correspond to any object in the scene. +// They are dynamic procedural decals. See plDynaDecal.cpp and plDynaDecalMgr.cpp. +// This is wrapped by IRenderAuxSpans, which makes sure state is restored to resume +// normal rendering after the AuxSpan is rendered. +void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) +{ + // Make sure the underlying resources are created and filled in with current data. + CheckVertexBufferRef(aux->fGroup, aux->fVBufferIdx); + CheckIndexBufferRef(aux->fGroup, aux->fIBufferIdx); + ICheckAuxBuffers(aux); + + // Set to render from the aux spans buffers. + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)aux->fGroup->GetVertexBufferRef(aux->fVBufferIdx); + + if( !vRef ) + return; + + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)aux->fGroup->GetIndexBufferRef(aux->fIBufferIdx); + + if( !iRef ) + return; + + + // Now just loop through the aux material, rendering in as many passes as it takes. + hsGMaterial* material = aux->fMaterial; + plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + + if (mRef == nullptr) { + mRef = new plMetalMaterialShaderRef(material, this); + material->SetDeviceRef(mRef); + } + + /* Vertex Buffer stuff */ + if(!vRef->GetBuffer()) { + return; + } + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + + plRenderTriListFunc render(&fDevice, 0, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength); + + size_t pass; + for (pass = 0; pass < mRef->GetNumPasses(); pass++) { + //IHandleMaterial(material, pass, &span, vRef); +#if 1 + plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); + fCurrLayerIdx = mRef->GetPassIndex(pass); + + ICalcLighting(mRef, lay, &span); + + hsGMatState s; + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + /* + If the layer opacity is 0, don't draw it. This prevents it from contributing to the Z buffer. + This can happen with some models like the fire marbles in the neighborhood that have some models + for physics only, and then can block other rendering in the Z buffer. + DX pipeline does this in ILoopOverLayers. + */ + if( (s.fBlendFlags & hsGMatState::kBlendAlpha) + &&lay->GetOpacity() <= 0 + &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) { + continue; + } + + IHandleZMode(s); + IHandleBlendMode(s); + + if (s.fMiscFlags & hsGMatState::kMiscTwoSided) { + if(fCurrentCullMode != MTL::CullModeNone) { + fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + fCurrentCullMode = MTL::CullModeNone; + } + } else { + ISetCullMode(); + } + + if(lay->GetVertexShader()) { + //pure shader path + plShader *vertexShader = lay->GetVertexShader(); + plShader *fragShader = lay->GetPixelShader(); + + fCurrLay = lay; + fCurrNumLayers = mRef->fPassLengths[pass]; + + ISetShaders(vRef, s, vertexShader, fragShader); + + //FIXME: Programmable pipeline does not implement the full feature set + /* + The programmable pipeline doesn't do things like set the texture transform matrices, + In practice, the transforms aren't set and used. Does it matter that the Metal + implementation doesn't implemention the full inputs the DX version gets? + + If it is implemented, the same checks the DX version does should be also implemented. + DX will set texture transforms, but then turn them off in the pipeline and manually + manipulate texture co-ords in the shader. + + Texture setting should also _maybe_ be reconciled with the "fixed" pipeline. But + the fixed pipeline uses indirect textures mapped to a buffer. That approach could + work for the programmable pipeline too, but I'm planning changes to the fixed pipeline + and the way it stores textures. So maybe things should be reconciled after that + work is done. + */ + + for (size_t i = mRef->GetPassIndex(pass); i < mRef->GetPassIndex(pass) + mRef->fPassLengths[pass]; i++) { + plLayerInterface* layer = material->GetLayer(i); + if (!layer) { + continue; + } + + CheckTextureRef(layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + continue; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + continue; + } + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i - mRef->GetPassIndex(pass)); + } + } else { + //"Fixed" path + + + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + if (s.fBlendFlags & hsGMatState::kBlendInvertVtxAlpha) + fCurrentRenderPassUniforms->invVtxAlpha = true; + else + fCurrentRenderPassUniforms->invVtxAlpha = false; + + std::vector& spanLights = span.GetLightList(false); + + int numActivePiggyBacks = 0; + //FIXME: In the DX source, this check was done on the first layer. Does that mean the first layer of the material or the first layer of the pass? + if( !(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive) ) + { + /// Tack lightmap onto last stage if we have one + numActivePiggyBacks = fActivePiggyBacks; + //if( numActivePiggyBacks > fMaxLayersAtOnce - fCurrNumLayers ) + // numActivePiggyBacks = fMaxLayersAtOnce - fCurrNumLayers; + + } + + plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered); + MTL::RenderPipelineState *pipelineState = pipeline->pipelineState; + if(fCurrentPipelineState != pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); + fCurrentPipelineState = pipelineState; + } + + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, plMetalMaterialShaderRef::Passthrough, plMetalMaterialShaderRef::Passthrough); + } +#endif + if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) + { + fCurrentRenderPassUniforms->ambientCol = simd_float4(1.0f); + + fCurrentRenderPassUniforms->diffuseSrc = 1.0; + fCurrentRenderPassUniforms->ambientSrc = 1.0; + fCurrentRenderPassUniforms->emissiveSrc = 0.0; + fCurrentRenderPassUniforms->specularSrc = 1.0; + } + + render.RenderPrims(); + } + + /*HRESULT r; + + r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize ); + hsAssert( r == D3D_OK, "Error trying to set the stream source!" ); + plProfile_Inc(VertexChange); + + fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = IGetBufferD3DFormat(vRef->fFormat)); + + r = fD3DDevice->SetIndices( iRef->fD3DBuffer ); + hsAssert( r == D3D_OK, "Error trying to set the indices!" ); + + plRenderTriListFunc render(fD3DDevice, iRef->fOffset, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength/3); + int j; + for( j = 0; j < material->GetNumLayers(); ) + { + int iCurrMat = j; + j = IHandleMaterial( material, iCurrMat, &span ); + if (j == -1) + break; + + ISetShaders(material->GetLayer(iCurrMat)->GetVertexShader(), material->GetLayer(iCurrMat)->GetPixelShader()); + + if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) + { + static D3DMATERIAL9 mat; + fD3DDevice->SetRenderState(D3DRS_AMBIENT, 0xffffffff); + + fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_MATERIAL ); + fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_COLOR1 ); + fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL ); + fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL ); + + fD3DDevice->SetMaterial( &mat ); + } + + render.RenderPrims(); + }*/ + +} + +bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) +{ + plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + + fCurrLayerIdx = mRef->GetPassIndex(pass); + //plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); + plLayerInterface *lay = material->GetLayer(mRef->GetPassIndex(pass)); + + ICalcLighting(mRef, lay, currSpan); + + hsGMatState s; + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + /* + If the layer opacity is 0, don't draw it. This prevents it from contributing to the Z buffer. + This can happen with some models like the fire marbles in the neighborhood that have some models + for physics only, and then can block other rendering in the Z buffer. + DX pipeline does this in ILoopOverLayers. + */ + if( (s.fBlendFlags & hsGMatState::kBlendAlpha) + &&lay->GetOpacity() <= 0 + &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) { + + return false; + } + + IHandleZMode(s); + IHandleBlendMode(s); + + if (s.fMiscFlags & hsGMatState::kMiscTwoSided) { + if(fCurrentCullMode != MTL::CullModeNone) { + fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + fCurrentCullMode = MTL::CullModeNone; + } + } else { + ISetCullMode(); + } + + + //Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders to null after calling this. That won't work here in since our pipeline state has to know the shaders. + if(lay->GetVertexShader() && allowShaders) { + //pure shader path + plShader *vertexShader = lay->GetVertexShader(); + plShader *fragShader = lay->GetPixelShader(); + + fCurrLay = lay; + fCurrNumLayers = mRef->fPassLengths[pass]; + + ISetShaders(vRef, s, vertexShader, fragShader); + + //FIXME: Programmable pipeline does not implement the full feature set + /* + The programmable pipeline doesn't do things like set the texture transform matrices, + In practice, the transforms aren't set and used. Does it matter that the Metal + implementation doesn't implemention the full inputs the DX version gets? + + If it is implemented, the same checks the DX version does should be also implemented. + DX will set texture transforms, but then turn them off in the pipeline and manually + manipulate texture co-ords in the shader. + + Texture setting should also _maybe_ be reconciled with the "fixed" pipeline. But + the fixed pipeline uses indirect textures mapped to a buffer. That approach could + work for the programmable pipeline too, but I'm planning changes to the fixed pipeline + and the way it stores textures. So maybe things should be reconciled after that + work is done. + */ + + for (size_t i = mRef->GetPassIndex(pass); i < mRef->GetPassIndex(pass) + mRef->fPassLengths[pass]; i++) { + plLayerInterface* layer = material->GetLayer(i); + if (!layer) { + return false; + } + + CheckTextureRef(layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + if (!img) { + return false; + } + + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); + + if (!texRef->fTexture) { + return false; + } + + size_t idOffset = 0; + //Metal doesn't like mixing 2D and cubic textures. If this is a cubic texture, make sure it lands in the right ID range. + if(plCubicRenderTarget::ConvertNoRef( img )) { + idOffset = FragmentShaderArgumentAttributeCubicTextures; + } + + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i - mRef->GetPassIndex(pass) + idOffset); + } + } else { + //"Fixed" path + + lay = IPushOverBaseLayer(lay); + + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + if (s.fBlendFlags & hsGMatState::kBlendInvertVtxAlpha) + fCurrentRenderPassUniforms->invVtxAlpha = true; + else + fCurrentRenderPassUniforms->invVtxAlpha = false; + + std::vector& spanLights = currSpan->GetLightList(false); + + int numActivePiggyBacks = 0; + //FIXME: In the DX source, this check was done on the first layer. Does that mean the first layer of the material or the first layer of the pass? + if( !(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive) ) + { + /// Tack lightmap onto last stage if we have one + numActivePiggyBacks = fActivePiggyBacks; + //if( numActivePiggyBacks > fMaxLayersAtOnce - fCurrNumLayers ) + // numActivePiggyBacks = fMaxLayersAtOnce - fCurrNumLayers; + + } + + plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered); + MTL::RenderPipelineState *pipelineState = pipeline->pipelineState; + if(fCurrentPipelineState != pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); + fCurrentPipelineState = pipelineState; + } + lay = IPopOverBaseLayer(lay); + + if(numActivePiggyBacks==0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { + mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass); + } + + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, + [&](plLayerInterface* layer, uint32_t index){ + if(index==0) { + layer = IPushOverBaseLayer(layer); + } + layer = IPushOverAllLayer(layer); + return layer; + }, + [&](plLayerInterface* layer, uint32_t index){ + layer = IPopOverAllLayer(layer); + if(index==0) + layer = IPopOverBaseLayer(layer); + return layer; + }); + } + + return true; +} + +// ISetPipeConsts ////////////////////////////////////////////////////////////////// +// A shader can request that the pipeline fill in certain constants that are indeterminate +// until the pipeline is about to render the object the shader is applied to. For example, +// the object's local to world. A single shader may be used on multiple objects with +// multiple local to world transforms. This ensures the pipeline will shove the proper +// local to world into the shader immediately before the render. +// See plShader.h for the list of available pipe constants. +// Note that the lighting pipe constants are NOT implemented. +void plMetalPipeline::ISetPipeConsts(plShader* shader) +{ + int n = shader->GetNumPipeConsts(); + int i; + for( i = 0; i < n; i++ ) + { + const plPipeConst& pc = shader->GetPipeConst(i); + switch( pc.fType ) + { + case plPipeConst::kFogSet: + { + float set[4]; + //FIXME: Fog broken in dynamic pipeline + //IGetVSFogSet(set); + //shader->SetFloat4(pc.fReg, set); + } + break; + case plPipeConst::kLayAmbient: + { + hsColorRGBA col = fCurrLay->GetAmbientColor(); + shader->SetColor(pc.fReg, col); + } + break; + case plPipeConst::kLayRuntime: + { + hsColorRGBA col = fCurrLay->GetRuntimeColor(); + col.a = fCurrLay->GetOpacity(); + shader->SetColor(pc.fReg, col); + } + break; + case plPipeConst::kLaySpecular: + { + hsColorRGBA col = fCurrLay->GetSpecularColor(); + shader->SetColor(pc.fReg, col); + } + break; + case plPipeConst::kTex3x4_0: + case plPipeConst::kTex3x4_1: + case plPipeConst::kTex3x4_2: + case plPipeConst::kTex3x4_3: + case plPipeConst::kTex3x4_4: + case plPipeConst::kTex3x4_5: + case plPipeConst::kTex3x4_6: + case plPipeConst::kTex3x4_7: + { + int stage = pc.fType - plPipeConst::kTex3x4_0; + + if( stage > fCurrNumLayers ) + { + // Ooops. This is bad, means the shader is expecting more layers than + // we actually have (or is just bogus). Assert and quietly continue. + hsAssert(false, "Shader asking for higher stage transform than we have"); + continue; + } + const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); + + shader->SetMatrix34(pc.fReg, xfm); + } + break; + case plPipeConst::kTex2x4_0: + case plPipeConst::kTex2x4_1: + case plPipeConst::kTex2x4_2: + case plPipeConst::kTex2x4_3: + case plPipeConst::kTex2x4_4: + case plPipeConst::kTex2x4_5: + case plPipeConst::kTex2x4_6: + case plPipeConst::kTex2x4_7: + { + int stage = pc.fType - plPipeConst::kTex2x4_0; + + if( stage > fCurrNumLayers ) + { + // Ooops. This is bad, means the shader is expecting more layers than + // we actually have (or is just bogus). Assert and quietly continue. + hsAssert(false, "Shader asking for higher stage transform than we have"); + continue; + } + const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); + + shader->SetMatrix24(pc.fReg, xfm); + } + break; + case plPipeConst::kTex1x4_0: + case plPipeConst::kTex1x4_1: + case plPipeConst::kTex1x4_2: + case plPipeConst::kTex1x4_3: + case plPipeConst::kTex1x4_4: + case plPipeConst::kTex1x4_5: + case plPipeConst::kTex1x4_6: + case plPipeConst::kTex1x4_7: + { + int stage = pc.fType - plPipeConst::kTex1x4_0; + + if( stage > fCurrNumLayers ) + { + // Ooops. This is bad, means the shader is expecting more layers than + // we actually have (or is just bogus). Assert and quietly continue. + hsAssert(false, "Shader asking for higher stage transform than we have"); + continue; + } + const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); + + shader->SetFloat4(pc.fReg, xfm.fMap[0]); + } + break; + case plPipeConst::kLocalToNDC: + { + hsMatrix44 cam2ndc = IGetCameraToNDC(); + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + hsMatrix44 local2ndc = cam2ndc * world2cam * GetLocalToWorld(); + + shader->SetMatrix44(pc.fReg, local2ndc); + } + break; + + case plPipeConst::kCameraToNDC: + { + hsMatrix44 cam2ndc = IGetCameraToNDC(); + + shader->SetMatrix44(pc.fReg, cam2ndc); + } + break; + + case plPipeConst::kWorldToNDC: + { + hsMatrix44 cam2ndc = IGetCameraToNDC(); + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + hsMatrix44 world2ndc = cam2ndc * world2cam; + + shader->SetMatrix44(pc.fReg, world2ndc); + } + break; + + case plPipeConst::kLocalToWorld: + shader->SetMatrix34(pc.fReg, GetLocalToWorld()); + break; + + case plPipeConst::kWorldToLocal: + shader->SetMatrix34(pc.fReg, GetWorldToLocal()); + break; + + case plPipeConst::kWorldToCamera: + { + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + shader->SetMatrix34(pc.fReg, world2cam); + } + break; + + case plPipeConst::kCameraToWorld: + { + hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld(); + + shader->SetMatrix34(pc.fReg, cam2world); + } + break; + + case plPipeConst::kLocalToCamera: + { + hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); + + hsMatrix44 local2cam = world2cam * GetLocalToWorld(); + + shader->SetMatrix34(pc.fReg, local2cam); + } + break; + + case plPipeConst::kCameraToLocal: + { + hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld(); + + hsMatrix44 cam2local = GetWorldToLocal() * cam2world; + + shader->SetMatrix34(pc.fReg, cam2local); + } + break; + + case plPipeConst::kCamPosWorld: + { + shader->SetVectorW(pc.fReg, GetViewTransform().GetCameraToWorld().GetTranslate(), 1.f); + } + break; + + case plPipeConst::kCamPosLocal: + { + hsPoint3 localCam = GetWorldToLocal() * GetViewTransform().GetCameraToWorld().GetTranslate(); + + shader->SetVectorW(pc.fReg, localCam, 1.f); + } + break; + + case plPipeConst::kObjPosWorld: + { + shader->SetVectorW(pc.fReg, GetLocalToWorld().GetTranslate(), 1.f); + } + break; + + // UNIMPLEMENTED + case plPipeConst::kDirLight1: + case plPipeConst::kDirLight2: + case plPipeConst::kDirLight3: + case plPipeConst::kDirLight4: + case plPipeConst::kPointLight1: + case plPipeConst::kPointLight2: + case plPipeConst::kPointLight3: + case plPipeConst::kPointLight4: + break; + } + } +} + +// ISetShaders ///////////////////////////////////////////////////////////////////////////////////// +// Setup to render using the input vertex and pixel shader. Either or both may +// be nil, in which case the fixed function pipeline is indicated. +// Any Pipe Constants the non-FFP shader wants will be set here. +// Lastly, all constants will be set (as a block) for any non-FFP vertex or pixel shader. +bool plMetalPipeline::ISetShaders(const plMetalVertexBufferRef * vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader) +{ + hsAssert(vShader, "Can't handle programmable passes without vShader"); + hsAssert(pShader, "Can't handle programmable passes without pShader"); + plShaderID::ID vertexShaderID = vShader->GetDecl()->GetID(); + plShaderID::ID fragmentShaderID = pShader->GetDecl()->GetID(); + + plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, blendMode.fBlendFlags, 0, vertexShaderID, fragmentShaderID); + if(fCurrentPipelineState != pipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipeline->pipelineState); + fCurrentPipelineState = pipeline->pipelineState; + } + + if( vShader ) + { + hsAssert(vShader->IsVertexShader(), "Wrong type shader as vertex shader"); + ISetPipeConsts(vShader); + + plMetalVertexShader* vRef = (plMetalVertexShader*)vShader->GetDeviceRef(); + if( !vRef ) + { + vRef = new plMetalVertexShader(vShader); + hsRefCnt_SafeUnRef(vRef); + } + if( !vRef->IsLinked() ) + vRef->Link(&fVShaderRefList); + + vRef->ISetConstants(this); + } + + if( pShader ) + { + hsAssert(pShader->IsPixelShader(), "Wrong type shader as pixel shader"); + + ISetPipeConsts(pShader); + + plMetalFragmentShader* pRef = (plMetalFragmentShader*)pShader->GetDeviceRef(); + if( !pRef ) + { + pRef = new plMetalFragmentShader(pShader); + hsRefCnt_SafeUnRef(pRef); + } + if( !pRef->IsLinked() ) + pRef->Link(&fPShaderRefList); + + pRef->ISetConstants(this); + } + + /*if( vsHandle != fSettings.fCurrVertexShader ) + { + HRESULT hr = fD3DDevice->SetVertexShader(fSettings.fCurrVertexShader = vsHandle); + hsAssert(!FAILED(hr), "Error setting vertex shader"); + } + + if( psHandle != fSettings.fCurrPixelShader ) + { + HRESULT hr = fD3DDevice->SetPixelShader(fSettings.fCurrPixelShader = psHandle); + hsAssert(!FAILED(hr), "Error setting pixel shader"); + }*/ + + // Handle cull mode here, because current cullmode is dependent on + // the handedness of the LocalToCamera AND whether we are twosided. + ISetCullMode(); + + return true; +} + +bool plMetalPipeline::ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase) +{ + if (!(spanBase->fTypeMask & plSpan::kVertexSpan)) + return false; + // If we arent' an trilist, we're toast. + if (!(spanBase->fTypeMask & plSpan::kIcicleSpan)) + return false; + + plIcicle* span = (plIcicle*)spanBase; + + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); + if (!vRef) + return true; + + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx); + if (!iRef) + return true; + + // If our vertex buffer ref is volatile and the timestamp is off + // then it needs to be refilled + //MTL::PurgeableState bufferState = vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateNonVolatile); + if (vRef->Expired(fVtxRefTime)) { + IRefreshDynVertices(group, vRef); + fDevice.GetCurrentCommandBuffer()->addCompletedHandler( ^(MTL::CommandBuffer *buffer) { + //vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); + }); + } + + if (iRef->IsDirty()) { + fDevice.FillIndexBufferRef(iRef, group, span->fIBufferIdx); + iRef->SetRebuiltSinceUsed(true); + } + + return false; // No error +} + +bool plMetalPipeline::IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef) +{ + ptrdiff_t size = (group->GetVertBufferEnd(vRef->fIndex) - group->GetVertBufferStart(vRef->fIndex)) * vRef->fVertexSize; + if (!size) + return false; // No error, just nothing to do. + + hsAssert(size > 0, "Bad start and end counts in a group"); + + if (!vRef->GetBuffer()) + { + hsAssert(size > 0, "Being asked to fill a buffer that doesn't exist yet?"); + } + + uint8_t* vData; + if (vRef->fData) + vData = vRef->fData; + else + vData = group->GetVertBufferData(vRef->fIndex) + group->GetVertBufferStart(vRef->fIndex) * vRef->fVertexSize; + + vRef->PrepareForWrite(); + + MTL::Buffer* vertexBuffer = vRef->GetBuffer(); + if(!vertexBuffer || vertexBuffer->length() < size) { + //Plasma will present different length buffers at different times + vertexBuffer = fDevice.fMetalDevice->newBuffer(vData, size, MTL::ResourceStorageModeManaged)->autorelease(); + if(vRef->Volatile()) { + fDevice.GetCurrentCommandBuffer()->addCompletedHandler(^(MTL::CommandBuffer* buffer){ + //vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); + }); + } + vRef->SetBuffer(vertexBuffer); + } else { + memcpy(vertexBuffer->contents(), + vData, + size); + vertexBuffer->didModifyRange(NS::Range(0, size)); + } + + vRef->fRefTime = fVtxRefTime; + vRef->SetDirty(false); + + return false; +} + +void plMetalPipeline::IHandleZMode(hsGMatState flags) +{ + //Metal is very particular that if there is no depth buffer we need to explictly disable z read and write + if(fDevice.fCurrentDepthFormat == MTL::PixelFormatInvalid) { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); + return; + } + + MTL::DepthStencilState *newDepthState; + switch (flags.fZFlags & hsGMatState::kZMask) + { + case hsGMatState::kZClearZ: + //FIXME: Clear should actually clear the Z target + newDepthState = fDevice.fNoZReadStencilState; + break; + case hsGMatState::kZNoZRead: + newDepthState = fDevice.fNoZReadStencilState; + break; + case hsGMatState::kZNoZWrite: + newDepthState = fDevice.fNoZWriteStencilState; + break; + case hsGMatState::kZNoZRead | hsGMatState::kZClearZ: + newDepthState = fDevice.fNoZReadStencilState; + break; + case hsGMatState::kZNoZRead | hsGMatState::kZNoZWrite: + newDepthState = fDevice.fNoZReadOrWriteStencilState; + break; + case 0: + newDepthState = fDevice.fDefaultStencilState; + break; + case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite: + case hsGMatState::kZClearZ | hsGMatState::kZNoZWrite | hsGMatState::kZNoZRead: + hsAssert(false, "Illegal combination of Z Buffer modes (Clear but don't write)"); + break; + } + + if(fCurrentDepthStencilState != newDepthState) { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(newDepthState); + fCurrentDepthStencilState = newDepthState; + } + + if (flags.fZFlags & hsGMatState::kZIncLayer) { + fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, -1.1, -1.1); + } else { + fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, 0.0, 0.0); + } +} + +void plMetalPipeline::IHandleBlendMode(hsGMatState flags) +{ + // No color, just writing out Z values. + if (flags.fBlendFlags & hsGMatState::kBlendNoColor) { + //printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); + flags.fBlendFlags |= 0x80000000; + } else { + switch (flags.fBlendFlags & hsGMatState::kBlendMask) + { + // Detail is just a special case of alpha, handled in construction of the texture + // mip chain by making higher levels of the chain more transparent. + case hsGMatState::kBlendDetail: + case hsGMatState::kBlendAlpha: + if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalAlpha) { + if (flags.fBlendFlags & hsGMatState::kBlendAlphaPremultiplied) { + //printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); + } else { + //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); + } + } else { + if (flags.fBlendFlags & hsGMatState::kBlendAlphaPremultiplied) { + //printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); + } else { + //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); + } + } + break; + + // Multiply the final color onto the frame buffer. + case hsGMatState::kBlendMult: + if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalColor) { + //printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); + } else { + //printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); + } + break; + + // Add final color to FB. + case hsGMatState::kBlendAdd: + //printf("glBlendFunc(GL_ONE, GL_ONE);\n"); + break; + + // Multiply final color by FB color and add it into the FB. + case hsGMatState::kBlendMADD: + //printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); + break; + + // Final color times final alpha, added into the FB. + case hsGMatState::kBlendAddColorTimesAlpha: + if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalAlpha) { + //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); + } else { + //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); + } + break; + + // Overwrite final color onto FB + case 0: + //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + break; + + default: + { + hsAssert(false, "Too many blend modes specified in material"); + plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); + if( lay ) + { + if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha ) + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); + } + else + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); + } + } + } + break; + } + } +} + +void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan) +{ + //plProfile_Inc(MatLightState); + + if (IsDebugFlagSet(plPipeDbg::kFlagAllBright)) + { + fCurrentRenderPassUniforms->globalAmb = { 1.0, 1.0, 1.0, 1.0 }; + + fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->diffuseCol = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->specularCol = { 1.0, 1.0, 1.0, 1.0 }; + + fCurrentRenderPassUniforms->ambientSrc = 1.0; + fCurrentRenderPassUniforms->diffuseSrc = 1.0; + fCurrentRenderPassUniforms->emissiveSrc = 1.0; + fCurrentRenderPassUniforms->specularSrc = 1.0; + + return; + } + + hsGMatState state; + state.Composite(currLayer->GetState(), fMatOverOn, fMatOverOff); + + uint32_t mode = (currSpan != nullptr) ? (currSpan->fProps & plSpan::kLiteMask) : plSpan::kLiteMaterial; + + if (state.fMiscFlags & hsGMatState::kMiscBumpChans) { + mode = plSpan::kLiteMaterial; + state.fShadeFlags |= hsGMatState::kShadeNoShade | hsGMatState::kShadeWhite; + } + + /// Select one of our three lighting methods + switch (mode) { + case plSpan::kLiteMaterial: // Material shading + { + if (state.fShadeFlags & hsGMatState::kShadeWhite) { + fCurrentRenderPassUniforms->globalAmb = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0, 1.0 }; + } else if (IsDebugFlagSet(plPipeDbg::kFlagNoPreShade)) { + fCurrentRenderPassUniforms->globalAmb = { 0.0, 0.0, 0.0, 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0, 1.0 }; + } else { + hsColorRGBA amb = currLayer->GetPreshadeColor(); + fCurrentRenderPassUniforms->globalAmb = { amb.r, amb.g, amb.b, 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { amb.r, amb.g, amb.b, 1.0 }; + } + + hsColorRGBA dif = currLayer->GetRuntimeColor(); + fCurrentRenderPassUniforms->diffuseCol = { dif.r, dif.g, dif.b, currLayer->GetOpacity() }; + + hsColorRGBA em = currLayer->GetAmbientColor(); + fCurrentRenderPassUniforms->emissiveCol = { em.r, em.g, em.b, 1.0 }; + + // Set specular properties + if (state.fShadeFlags & hsGMatState::kShadeSpecular) { + hsColorRGBA spec = currLayer->GetSpecularColor(); + fCurrentRenderPassUniforms->specularCol = { spec.r, spec.g, spec.b, 1.0 }; +#if 0 + mat.Power = currLayer->GetSpecularPower(); +#endif + } else { + fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0, 0.0 }; + } + + fCurrentRenderPassUniforms->diffuseSrc = 1.0; + fCurrentRenderPassUniforms->emissiveSrc = 1.0; + fCurrentRenderPassUniforms -> specularSrc = 1.0; + + if (state.fShadeFlags & hsGMatState::kShadeNoShade) { + fCurrentRenderPassUniforms->ambientSrc = 1.0; + } else { + fCurrentRenderPassUniforms->ambientSrc = 0.0; + } + fCurrLightingMethod = plSpan::kLiteMaterial; + + break; + } + + case plSpan::kLiteVtxPreshaded: // Vtx preshaded + { + fCurrentRenderPassUniforms->globalAmb = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->diffuseCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->emissiveCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0, 0.0 }; + + fCurrentRenderPassUniforms->diffuseSrc = 0.0; + fCurrentRenderPassUniforms->ambientSrc = 1.0; + fCurrentRenderPassUniforms->specularSrc = 1.0; + + if (state.fShadeFlags & hsGMatState::kShadeEmissive) { + fCurrentRenderPassUniforms->emissiveSrc = 0.0; + } else { + fCurrentRenderPassUniforms->emissiveSrc = 1.0; + } + + fCurrLightingMethod = plSpan::kLiteVtxPreshaded; + break; + } + + case plSpan::kLiteVtxNonPreshaded: // Vtx non-preshaded + { + fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->diffuseCol = { 0.0, 0.0, 0.0, 0.0 }; + + hsColorRGBA em = currLayer->GetAmbientColor(); + fCurrentRenderPassUniforms->emissiveCol = { em.r, em.g, em.b, 1.0 }; + + // Set specular properties + if (state.fShadeFlags & hsGMatState::kShadeSpecular) { + hsColorRGBA spec = currLayer->GetSpecularColor(); + fCurrentRenderPassUniforms->specularCol = { spec.r, spec.g, spec.b, 1.0 }; +#if 0 + mat.Power = currLayer->GetSpecularPower(); +#endif + } else { + fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0, 0.0 }; + } + + hsColorRGBA amb = currLayer->GetPreshadeColor(); + fCurrentRenderPassUniforms->globalAmb = { amb.r, amb.g, amb.b, amb.a }; + + fCurrentRenderPassUniforms->ambientSrc = 0.0; + fCurrentRenderPassUniforms->diffuseSrc = 0.0; + fCurrentRenderPassUniforms->emissiveSrc = 1.0; + fCurrentRenderPassUniforms->specularSrc = 1.0; + + fCurrLightingMethod = plSpan::kLiteVtxNonPreshaded; + break; + } + } + // Piggy-back some temporary fog stuff on the lighting... + const plFogEnvironment* fog = (currSpan ? (currSpan->fFogEnvironment ? currSpan->fFogEnvironment : &fView.GetDefaultFog()) : nullptr); + + if (currLayer) + { + if ((currLayer->GetShadeFlags() & hsGMatState::kShadeReallyNoFog) && !(fMatOverOff.fShadeFlags & hsGMatState::kShadeReallyNoFog)) + fog = nil; + } + + uint8_t type = fog ? fog->GetType() : plFogEnvironment::kNoFog; + hsColorRGBA color; + + switch (type) { + case plFogEnvironment::kLinearFog: + { + float start, end; + fog->GetPipelineParams(&start, &end, &color); + + fCurrentRenderPassUniforms->fogExponential = 0; + fCurrentRenderPassUniforms->fogValues = {start, end}; + fCurrentRenderPassUniforms->fogColor = { color.r, color.g, color.b }; + break; + } + case plFogEnvironment::kExpFog: + case plFogEnvironment::kExp2Fog: + { + float density; + float power = (type == plFogEnvironment::kExp2Fog) ? 2.0f : 1.0f; + fog->GetPipelineParams(&density, &color); + + fCurrentRenderPassUniforms->fogExponential = 1; + fCurrentRenderPassUniforms->fogValues = { power, density}; + fCurrentRenderPassUniforms->fogColor = { color.r, color.g, color.b }; + break; + } + default: + fCurrentRenderPassUniforms->fogExponential = 0; + fCurrentRenderPassUniforms->fogValues = { 0.0, 0.0 }; + fCurrentRenderPassUniforms->fogColor = { 0.0, 0.0, 0.0 }; + break; + } +} + +void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj) +{ + const size_t numLights = 8; + size_t i = 0; + int32_t startScale; + float threshhold; + float overHold = 0.3; + float scale; + static std::vector onLights; + onLights.clear(); + + if (!IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights) && + !(IsDebugFlagSet(plPipeDbg::kFlagNoApplyProjLights) && proj) && + !(IsDebugFlagSet(plPipeDbg::kFlagOnlyApplyProjLights) && !proj)) + { + std::vector& spanLights = span->GetLightList(proj); + + for (i = 0; i < spanLights.size() && i < numLights; i++) { + // If these are non-projected lights, go ahead and enable them. + if( !proj ) + { + IEnableLight(mRef, i, spanLights[i]); + } + onLights.emplace_back(spanLights[i]); + } + startScale = i; + + /// Attempt #2: Take some of the n strongest lights (below a given threshhold) and + /// fade them out to nothing as they get closer to the bottom. This way, they fade + /// out of existence instead of pop out. + + if (i < spanLights.size() - 1 && i > 0) { + threshhold = span->GetLightStrength(i, proj); + i--; + overHold = threshhold * 1.5f; + + if (overHold > span->GetLightStrength(0, proj)) { + overHold = span->GetLightStrength(0, proj); + } + + for (; i > 0 && span->GetLightStrength(i, proj) < overHold; i--) { + scale = (overHold - span->GetLightStrength(i, proj)) / (overHold - threshhold); + + IScaleLight(mRef, i, (1 - scale) * span->GetLightScale(i, proj)); + } + startScale = i + 1; + } + + + /// Make sure those lights that aren't scaled....aren't + for (i = 0; i < startScale; i++) { + IScaleLight(mRef, i, span->GetLightScale(i, proj)); + } + } + + // For the projected lights, don't enable, just remember who they are. + if( proj ) + { + fProjAll.clear(); + fProjEach.clear(); + for( i = 0; i < onLights.size(); i++ ) + { + if( onLights[i]->OverAll() ) + fProjAll.emplace_back(onLights[i]); + else + fProjEach.emplace_back(onLights[i]); + } + onLights.clear(); + } + + for (; i < numLights; i++) { + IDisableLight(mRef, i); + } +} + +void plMetalPipeline::IEnableLight(plMetalMaterialShaderRef* mRef, size_t i, plLightInfo* light) +{ + hsColorRGBA amb = light->GetAmbient(); + fCurrentRenderPassUniforms->lampSources[i].ambient = { amb.r, amb.g, amb.b, amb.a }; + + hsColorRGBA diff = light->GetDiffuse(); + fCurrentRenderPassUniforms->lampSources[i].diffuse = { diff.r, diff.g, diff.b, diff.a }; + + hsColorRGBA spec = light->GetSpecular(); + fCurrentRenderPassUniforms->lampSources[i].specular = { spec.r, spec.g, spec.b, spec.a }; + + plDirectionalLightInfo* dirLight = nullptr; + plOmniLightInfo* omniLight = nullptr; + plSpotLightInfo* spotLight = nullptr; + + if ((dirLight = plDirectionalLightInfo::ConvertNoRef(light)) != nullptr) + { + hsVector3 lightDir = dirLight->GetWorldDirection(); + fCurrentRenderPassUniforms->lampSources[i].position = { lightDir.fX, lightDir.fY, lightDir.fZ, 0.0 }; + fCurrentRenderPassUniforms->lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; + + fCurrentRenderPassUniforms->lampSources[i].constAtten = 1.0f; + fCurrentRenderPassUniforms->lampSources[i].linAtten = 0.0f; + fCurrentRenderPassUniforms->lampSources[i].quadAtten = 0.0f; + } + else if ((omniLight = plOmniLightInfo::ConvertNoRef(light)) != nullptr) + { + hsPoint3 pos = omniLight->GetWorldPosition(); + fCurrentRenderPassUniforms->lampSources[i].position = { pos.fX, pos.fY, pos.fZ, 1.0 }; + + // TODO: Maximum Range + + fCurrentRenderPassUniforms->lampSources[i].constAtten = omniLight->GetConstantAttenuation(); + fCurrentRenderPassUniforms->lampSources[i].linAtten = omniLight->GetLinearAttenuation(); + fCurrentRenderPassUniforms->lampSources[i].quadAtten = omniLight->GetQuadraticAttenuation(); + + if (!omniLight->GetProjection() && (spotLight = plSpotLightInfo::ConvertNoRef(omniLight)) != nullptr) { + hsVector3 lightDir = spotLight->GetWorldDirection(); + fCurrentRenderPassUniforms->lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; + + float falloff = spotLight->GetFalloff(); + float theta = cosf(spotLight->GetSpotInner()); + float phi = cosf(spotLight->GetProjection() ? hsConstants::half_pi : spotLight->GetSpotOuter()); + + fCurrentRenderPassUniforms->lampSources[i].spotProps = { falloff, theta, phi }; + } else { + fCurrentRenderPassUniforms->lampSources[i].spotProps = { 0.0, 0.0, 0.0 }; + } + } + else { + IDisableLight(mRef, i); + } +} + +void plMetalPipeline::IDisableLight(plMetalMaterialShaderRef* mRef, size_t i) +{ + fCurrentRenderPassUniforms->lampSources[i].position = { 0.0f, 0.0f, 0.0f, 0.0f }; + fCurrentRenderPassUniforms->lampSources[i].ambient = { 0.0f, 0.0f, 0.0f, 0.0f }; + fCurrentRenderPassUniforms->lampSources[i].diffuse = { 0.0f, 0.0f, 0.0f, 0.0f }; + fCurrentRenderPassUniforms->lampSources[i].specular = { 0.0f, 0.0f, 0.0f, 0.0f }; + fCurrentRenderPassUniforms->lampSources[i].constAtten = { 1.0f }; + fCurrentRenderPassUniforms->lampSources[i].linAtten = { 0.0f }; + fCurrentRenderPassUniforms->lampSources[i].quadAtten = { 0.0f }; + fCurrentRenderPassUniforms->lampSources[i].scale = { 0.0f }; +} + +void plMetalPipeline::IScaleLight(plMetalMaterialShaderRef* mRef, size_t i, float scale) +{ + scale = int(scale * 1.e1f) * 1.e-1f; + fCurrentRenderPassUniforms->lampSources[i].scale = scale; +} + +void plMetalPipeline::IDrawPlate(plPlate* plate) +{ + if(!plate->IsVisible()) { + return; + } + hsGMaterial* material = plate->GetMaterial(); + + plLayerInterface* lay = material->GetLayer(0); + hsGMatState s; + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + IHandleZMode(s); + IHandleBlendMode(s); + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); + fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState; + + //column major layout + simd_float4x4 projMat = matrix_identity_float4x4; + //projMat.columns[2][3] = 1.0f; + //projMat.columns[3][1] = -0.5f; + projMat.columns[3][2] = 0.0f; + projMat.columns[1][1] = 1.0f; + + /// Set up the transform directly + fDevice.SetLocalToWorldMatrix(plate->GetTransform(), false); + + IPushPiggyBacks(material); + + // First, do we have a device ref at this index? + plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + + if (mRef == nullptr) { + mRef = new plMetalMaterialShaderRef(material, this); + material->SetDeviceRef(mRef); + } + + if (!mRef->IsLinked()) { + mRef->Link(&fMatRefList); + } + + fDevice.SetLocalToWorldMatrix(plate->GetTransform()); + + plMetalPlateManager *pm = (plMetalPlateManager *)fPlateMgr; + + MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fDevice.GetCurrentDrawable()->texture()); + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); + + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pm->fPlateRenderPipelineState); + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(pm->fDepthState); + fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + + int uniformSize = sizeof(VertexUniforms); + VertexUniforms uniforms; + uniforms.projectionMatrix = projMat; + matrix_float4x4 modelMatrix; + uniforms.worldToCameraMatrix = modelMatrix; + uniforms.uvTransforms[0].flags = 0; + uniforms.uvTransforms[0].UVWSrc = 0; + uniforms.numUVSrcs = 1; + //uniforms.worldToLocalMatrix = fDevice.fMatrixW2L; + + //flip world to camera, it's upside down + matrix_float4x4 flip = matrix_identity_float4x4; + flip.columns[1][1] = -1.0f; + + + //uniforms.worldToCameraMatrix = + //uniforms.cameraToWorldMatrix = fDevice.fMatrixC2W; + uniforms.localToWorldMatrix = matrix_multiply(flip, fDevice.fMatrixL2W); + + mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), &uniforms, 0); + //FIXME: Hacking the old texture drawing into the plate path + mRef->prepareTextures(fDevice.CurrentRenderCommandEncoder(), 0); + + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&uniforms, sizeof(VertexUniforms), BufferIndexState); + + pm->encodeVertexBuffer(fDevice.CurrentRenderCommandEncoder()); + + IPopPiggyBacks(); +} + +//Push and pop light sources +//The DX version would just keep a giant pool of lights +//that could be claimed by different parts of the pipeline. +//In Metal, when a part of the pipeline wants to own lights +//we'll just let them push/pop the current state. +void plMetalPipeline::PushCurrentLightSources() +{ + plMetalShaderLightSource *lightSources = new plMetalShaderLightSource[8](); + memcpy(lightSources, fCurrentRenderPassUniforms->lampSources, sizeof(plMetalShaderLightSource[8])); + fLightSourceStack.emplace_back(lightSources); +} + +void plMetalPipeline::PopCurrentLightSources() +{ + hsAssert(fLightSourceStack.size() > 0, "Asked to pop light sources but none on stack"); + plMetalShaderLightSource *lightSources = fLightSourceStack.back(); + fLightSourceStack.pop_back(); + memcpy(fCurrentRenderPassUniforms->lampSources, lightSources, sizeof(plMetalShaderLightSource[8])); + delete lightSources; +} + +// Special effects ///////////////////////////////////////////////////////////// + +// IPushOverBaseLayer ///////////////////////////////////////////////////////// +// Sets fOverBaseLayer (if any) as a wrapper on top of input layer. +// This allows the OverBaseLayer to intercept and modify queries of +// the real current layer's properties (e.g. color or state). +// fOverBaseLayer is set to only get applied to the base layer during +// multitexturing. +// Must be matched with call to IPopOverBaseLayer. +plLayerInterface* plMetalPipeline::IPushOverBaseLayer(plLayerInterface* li) +{ + if( !li ) + return nil; + + fOverLayerStack.emplace_back(li); + + if( !fOverBaseLayer ) + return fOverBaseLayer = li; + + fForceMatHandle = true; + fOverBaseLayer = fOverBaseLayer->Attach(li); + fOverBaseLayer->Eval(fTime, fFrame, 0); + return fOverBaseLayer; +} + +// IPopOverBaseLayer ///////////////////////////////////////////////////////// +// Removes fOverBaseLayer as wrapper on top of input layer. +// Should match calls to IPushOverBaseLayer. +plLayerInterface* plMetalPipeline::IPopOverBaseLayer(plLayerInterface* li) +{ + if( !li ) + return nil; + + fForceMatHandle = true; + + plLayerInterface* pop = fOverLayerStack.back(); + fOverLayerStack.pop_back(); + fOverBaseLayer = fOverBaseLayer->Detach(pop); + + return pop; +} + +// IPushOverAllLayer /////////////////////////////////////////////////// +// Push fOverAllLayer (if any) as wrapper around the input layer. +// fOverAllLayer is set to be applied to each layer during multitexturing. +// Must be matched by call to IPopOverAllLayer +plLayerInterface* plMetalPipeline::IPushOverAllLayer(plLayerInterface* li) +{ + if( !li ) + return nil; + + fOverLayerStack.push_back(li); + + if( !fOverAllLayer ) + { + fOverAllLayer = li; + fOverAllLayer->Eval(fTime, fFrame, 0); + return fOverAllLayer; + } + + fForceMatHandle = true; + fOverAllLayer = fOverAllLayer->Attach(li); + fOverAllLayer->Eval(fTime, fFrame, 0); + + return fOverAllLayer; +} + +// IPopOverAllLayer ////////////////////////////////////////////////// +// Remove fOverAllLayer as wrapper on top of input layer. +// Should match calls to IPushOverAllLayer. +plLayerInterface* plMetalPipeline::IPopOverAllLayer(plLayerInterface* li) +{ + if( !li ) + return nil; + + fForceMatHandle = true; + + plLayerInterface* pop = fOverLayerStack.back(); + fOverLayerStack.pop_back(); + fOverAllLayer = fOverAllLayer->Detach(pop); + + return pop; +} + +// IPushProjPiggyBack ////////////////////////////////////////////////// +// Push a projected texture on as a piggy back. +void plMetalPipeline::IPushProjPiggyBack(plLayerInterface* li) +{ + if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + return; + + fPiggyBackStack.push_back(li); + fActivePiggyBacks = fPiggyBackStack.size() - fMatPiggyBacks; + fForceMatHandle = true; +} + +// IPopProjPiggyBacks ///////////////////////////////////////////////// +// Remove a projected texture from use as a piggy back. +void plMetalPipeline::IPopProjPiggyBacks() +{ + if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + return; + + fPiggyBackStack.resize(fMatPiggyBacks); + ISetNumActivePiggyBacks(); + fForceMatHandle = true; +} + +// IPushPiggyBacks //////////////////////////////////////////////////// +// Push any piggy backs associated with a material, presumed to +// be a light map because that's all they are used for. +// Matched with IPopPiggyBacks +void plMetalPipeline::IPushPiggyBacks(hsGMaterial* mat) +{ + hsAssert(!fMatPiggyBacks, "Push/Pop Piggy mismatch"); + + if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + return; + + int i; + for( i = 0; i < mat->GetNumPiggyBacks(); i++ ) + { + if( !mat->GetPiggyBack(i) ) + continue; + + if ((mat->GetPiggyBack(i)->GetMiscFlags() & hsGMatState::kMiscLightMap) + && IsDebugFlagSet(plPipeDbg::kFlagNoLightmaps)) + continue; + + fPiggyBackStack.push_back(mat->GetPiggyBack(i)); + fMatPiggyBacks++; + } + ISetNumActivePiggyBacks(); + fForceMatHandle = true; +} + +// IPopPiggyBacks /////////////////////////////////////////////////////// +// Pop any current piggy backs set from IPushPiggyBacks. +// Matches IPushPiggyBacks. +void plMetalPipeline::IPopPiggyBacks() +{ + if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + return; + + fPiggyBackStack.resize(fPiggyBackStack.size() - fMatPiggyBacks); + fMatPiggyBacks = 0; + + ISetNumActivePiggyBacks(); + fForceMatHandle = true; +} + +// PiggyBacks - used in techniques like projective lighting. +// PiggyBacks are layers appended to each drawprimitive pass. +// For example, if a material has 3 layers which will be drawn +// in 2 passes, +// pass0: layer0+layer1 +// pass1: layer2 +// Then if a piggyback layer layerPB is active, the actual rendering would be +// pass0: layer0+layer1+layerPB +// pass1: layer2 + layerPB + +// ISetNumActivePiggyBacks ///////////////////////////////////////////// +// Calculate the number of active piggy backs. +int plMetalPipeline::ISetNumActivePiggyBacks() +{ + return fActivePiggyBacks = std::min(static_cast(fMaxPiggyBacks), fPiggyBackStack.size()); +} + +void plMetalPipeline::IPreprocessAvatarTextures() +{ + plProfile_Set(AvRTPoolUsed, fClothingOutfits.size()); + plProfile_Set(AvRTPoolCount, fAvRTPool.size()); + plProfile_Set(AvRTPoolRes, fAvRTWidth); + plProfile_Set(AvRTShrinkTime, uint32_t(hsTimer::GetSysSeconds() - fAvRTShrinkValidSince)); + + // Frees anyone used last frame that we don't need this frame + IClearClothingOutfits(&fPrevClothingOutfits); + + if (fClothingOutfits.size() == 0) + return; + + static float kIdentityMatrix[16] = { + 1.0f, 0.0f, 0.0f, 0.0f, + 0.0f, 1.0f, 0.0f, 0.0f, + 0.0f, 0.0f, 1.0f, 0.0f, + 0.0f, 0.0f, 0.0f, 1.0f + }; + + //glUniformMatrix4fv(mRef->uMatrixProj, 1, GL_TRUE, kIdentityMatrix); + //glUniformMatrix4fv(mRef->uMatrixW2C, 1, GL_TRUE, kIdentityMatrix); + //glUniformMatrix4fv(mRef->uMatrixC2W, 1, GL_TRUE, kIdentityMatrix); + //glUniformMatrix4fv(mRef->uMatrixL2W, 1, GL_TRUE, kIdentityMatrix); + + for (size_t oIdx = 0; oIdx < fClothingOutfits.size(); oIdx++) { + plClothingOutfit* co = fClothingOutfits[oIdx]; + if (co->fBase == nullptr || co->fBase->fBaseTexture == nullptr) + continue; + +#if 0 + plRenderTarget* rt = plRenderTarget::ConvertNoRef(co->fTargetLayer->GetTexture()); + if (rt != nullptr && co->fDirtyItems.Empty()) + // we've still got our valid RT from last frame and we have nothing to do. + continue; + + if (rt == nullptr) { + rt = IGetNextAvRT(); + co->fTargetLayer->SetTexture(rt); + } +#endif + + //PushRenderTarget(rt); + + // HACK HACK HACK + co->fTargetLayer->SetTexture(co->fBase->fBaseTexture); + + // TODO: Actually render to the render target + + //PopRenderTarget(); + //co->fDirtyItems.Clear(); + } + + fView.fXformResetFlags = fView.kResetAll; + + fClothingOutfits.swap(fPrevClothingOutfits); +} + +void plMetalPipeline::FindFragFunction() { + MTL::Library *library = fDevice.fMetalDevice->newDefaultLibrary(); + + NS::Error *error = nullptr; + + MTL::FunctionConstantValues *functionContents = MTL::FunctionConstantValues::alloc()->init(); + short numUVs=1; + functionContents->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); + functionContents->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumLayers); + + MTL::Function *fragFunction = library->newFunction( + NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), + functionContents, + &error + ); + fFragFunction = fragFunction; + + library->release(); +} + +/*plPipeline* plPipelineCreate::ICreateMetalPipeline(hsWindowHndl disp, hsWindowHndl hWnd, const hsG3DDeviceModeRecord* devMode) +{ + plMetalPipeline* pipe = new plMetalPipeline(disp, hWnd, devMode); + return pipe; +}*/ + +// IClearShadowSlaves /////////////////////////////////////////////////////////////////////////// +// At EndRender(), we need to clear our list of shadow slaves. They are only valid for one frame. +void plMetalPipeline::IClearShadowSlaves() +{ + int i; + for( i = 0; i < fShadows.size(); i++ ) + { + const plShadowCaster* caster = fShadows[i]->fCaster; + caster->GetKey()->UnRefObject(); + } + fShadows.clear(); +} + +// Create all our video memory consuming D3D objects. +bool plMetalPipeline::ICreateDynDeviceObjects() +{ + // Front/Back/Depth buffers + //if( ICreateNormalSurfaces() ) + // return true; + + // RenderTarget pools are shared for our shadow generation algorithm. + // Different sizes for different resolutions. + IMakeRenderTargetPools(); + + // Create device-specific stuff + fDebugTextMgr = new plDebugTextManager(); + if( fDebugTextMgr == nil ) + return true; + + // Vertex buffers, index buffers, textures, etc. + LoadResources(); + + return false; +} + +// IReleaseDynDeviceObjects ////////////////////////////////////////////// +// Make sure we aren't holding on to anything, and release all of +// the D3D resources that we normally hang on to forever. Meaning things +// that persist through unloading one age and loading the next. +void plMetalPipeline::IReleaseDynDeviceObjects() +{ + // We should do this earlier, but the textFont objects don't remove + // themselves from their parent objects yet + delete fDebugTextMgr; + fDebugTextMgr = nil; + + while( fRenderTargetRefList ) + { + plMetalRenderTargetRef* rtRef = fRenderTargetRefList; + rtRef->Release(); + rtRef->Unlink(); + } + + //FIXME: Materials wouldn't normally be dynamic resources. But... the buffers can reference render targets which we are swapping. Might be able to fix this if shader references aren't encoded into the material. Piggybacks already aren't included, and it's complicating the fragment shader. So it might be better just to directly load texture references. + while( fMatRefList ) + { + plMetalMaterialShaderRef* matRef = fMatRefList; + matRef->Release(); + matRef->Unlink(); + } + + // The shared dynamic vertex buffers used by things like objects skinned on CPU, or + // particle systems. + //IReleaseDynamicBuffers(); + //IReleaseAvRTPool(); + IReleaseRenderTargetPools(); + +} + +// IReleaseRenderTargetPools ////////////////////////////////////////////////// +// Free up all resources assosiated with our pools of rendertargets of varying +// sizes. Primary user of these pools is the shadow generation. +void plMetalPipeline::IReleaseRenderTargetPools() +{ + int i; + + for( i = 0; i < fRenderTargetPool512.size(); i++ ) + { + delete fRenderTargetPool512[i]; + fRenderTargetPool512[i] = nil; + } + fRenderTargetPool512.clear(); + + for( i = 0; i < fRenderTargetPool256.size(); i++ ) + { + delete fRenderTargetPool256[i]; + fRenderTargetPool256[i] = nil; + } + fRenderTargetPool256.clear(); + + for( i = 0; i < fRenderTargetPool128.size(); i++ ) + { + delete fRenderTargetPool128[i]; + fRenderTargetPool128[i] = nil; + } + fRenderTargetPool128.clear(); + + for( i = 0; i < fRenderTargetPool64.size(); i++ ) + { + delete fRenderTargetPool64[i]; + fRenderTargetPool64[i] = nil; + } + fRenderTargetPool64.clear(); + + for( i = 0; i < fRenderTargetPool32.size(); i++ ) + { + delete fRenderTargetPool32[i]; + fRenderTargetPool32[i] = nil; + } + fRenderTargetPool32.clear(); + + for( i = 0; i < kMaxRenderTargetNext; i++ ) + { + fRenderTargetNext[i] = 0; + //fBlurScratchRTs[i] = nil; + //fBlurDestRTs[i] = nil; + } + +#ifdef MF_ENABLE_HACKOFF + hackOffscreens.Reset(); +#endif // MF_ENABLE_HACKOFF +} + +/////////////////////////////////////////////////////////////////////////////// +//// ShadowSection +//// Shadow specific internal functions +/////////////////////////////////////////////////////////////////////////////// +// See plGLight/plShadowMaster.cpp for more notes. + + + +float blurScale = -1.f; +static const int kL2NumSamples = 3; // Log2(4) + +// IPrepShadowCaster //////////////////////////////////////////////////////////////////////// +// Make sure all the geometry in this shadow caster is ready to be rendered. +// Keep in mind the single shadow caster may be multiple spans possibly in +// multiple drawables. +// The tricky part here is that we need to prep each drawable involved, +// but only prep it once. Say the caster is composed of: +// drawableA, span0 +// drawableA, span1 +// drawableB, span0 +// Then we need to call plDrawable::PrepForRender() ONCE on drawableA, +// and once on drawableB. Further, we need to do any necessary CPU +// skinning with ISofwareVertexBlend(drawableA, visList={0,1}) and +// ISofwareVertexBlend(drawableB, visList={1}). +bool plMetalPipeline::IPrepShadowCaster(const plShadowCaster* caster) +{ + static hsBitVector done; + done.Clear(); + const std::vector& castSpans = caster->Spans(); + + int i; + for( i = 0; i < castSpans.size(); i++ ) + { + if( !done.IsBitSet(i) ) + { + // We haven't already done this castSpan + + plDrawableSpans* drawable = castSpans[i].fDraw; + + // Start a visList with this index. + static std::vector visList; + visList.clear(); + visList.push_back((int16_t)(castSpans[i].fIndex)); + + // We're about to have done this castSpan. + done.SetBit(i); + + // Look forward through castSpans for any other spans + // with the same drawable, and add them to visList. + // We'll handle all the spans from this drawable at once. + int j; + for( j = i+1; j < castSpans.size(); j++ ) + { + if( !done.IsBitSet(j) && (castSpans[j].fDraw == drawable) ) + { + // Add to list + visList.push_back((int16_t)(castSpans[j].fIndex)); + + // We're about to have done this castSpan. + done.SetBit(j); + } + } + // That's all, prep the drawable. + drawable->PrepForRender( this ); + + // Do any software skinning. + //if( !ISoftwareVertexBlend(drawable, visList) ) + // return false; + } + } + + return true; +} + +// IRenderShadowCaster //////////////////////////////////////////////// +// Render the shadow caster into the slave's render target, creating a shadow map. +bool plMetalPipeline::IRenderShadowCaster(plShadowSlave* slave) +{ + const plShadowCaster* caster = slave->fCaster; + + // Setup to render into the slave's render target. + if( !IPushShadowCastState(slave) ) + return false; + + // Get the shadow caster ready to render. + if( !IPrepShadowCaster(slave->fCaster) ) + return false; + + // for each shadowCaster.fSpans + int iSpan; + for( iSpan = 0; iSpan < caster->Spans().size(); iSpan++ ) + { + plDrawableSpans* dr = caster->Spans()[iSpan].fDraw; + const plSpan* sp = caster->Spans()[iSpan].fSpan; + uint32_t spIdx = caster->Spans()[iSpan].fIndex; + + hsAssert(sp->fTypeMask & plSpan::kIcicleSpan, "Shadow casting from non-trimeshes not currently supported"); + + // render shadowcaster.fSpans[i] to rendertarget + if( !(sp->fProps & plSpan::kPropNoShadowCast) ) + IRenderShadowCasterSpan(slave, dr, *(const plIcicle*)sp); + + // Keep track of which shadow slaves this span was rendered into. + // If self-shadowing is off, we use that to determine not to + // project the shadow map onto its source geometry. + sp->SetShadowBit(slave->fIndex); //index set in SubmitShadowSlave + } + + // Debug only. + if( blurScale >= 0.f ) + slave->fBlurScale = blurScale; + + // If this shadow requests being blurred, do it. + //TODO: Shadow blurring + //if( slave->fBlurScale > 0.f ) + //IBlurShadowMap(slave); + + // Finished up, restore previous state. + IPopShadowCastState(slave); + +#if MCN_BOUNDS_SPANS + if (IsDebugFlagSet(plPipeDbg::kFlagShowShadowBounds)) + { + /// Add a span to our boundsIce to show this + IAddBoundsSpan(fBoundsSpans, &slave->fWorldBounds); + } +#endif // MCN_BOUNDS_SPANS + + return true; +} + +// We have a (possibly empty) list of shadows submitted for this frame. +// At BeginRender, we need to accomplish: +// Find render targets for each shadow request of the requested size. +// Render the associated spans into the render targets. Something like the following: +void plMetalPipeline::IPreprocessShadows() +{ + plProfile_BeginTiming(PrepShadows); + + // Mark our shared resources as free to be used. + IResetRenderTargetPools(); + + // Some board (possibly the Parhelia) freaked if anistropic filtering + // was enabled when rendering to a render target. We never need it for + // shadow maps, and it is slower, so we just kill it here. + //ISetAnisotropy(false); + + // Generate a shadow map for each submitted shadow slave. + // Shadow slave corresponds to one shadow caster paired + // with one shadow light that affects it. So a single caster + // may be in multiple slaves (from different lights), or a + // single light may be in different slaves (affecting different + // casters). The overall number is low in spite of the possible + // permutation explosion, because a slave is only generated + // for a caster being affected (in range etc.) by a light. + int iSlave; + for( iSlave = 0; iSlave < fShadows.size(); iSlave++ ) + { + plShadowSlave* slave = fShadows[iSlave]; + + // Any trouble, remove it from the list for this frame. + if( !IRenderShadowCaster(slave) ) + { + fShadows.erase(fShadows.begin() + iSlave); + iSlave--; + continue; + } + + } + + // Restore + //ISetAnisotropy(true); + + plProfile_EndTiming(PrepShadows); +} + +// IGetULutTextureRef /////////////////////////////////////////////////////////// +// The ULut just translates a U coordinate in range [0..1] into +// color and alpha of U * 255.9f. We just have the one we keep +// lying around. +plMetalTextureRef* plMetalPipeline::IGetULutTextureRef() +{ + const int width = 256; + const int height = 1; + if( !fULutTextureRef ) + { + uint32_t* tData = new uint32_t[width * height]; + + uint32_t* pData = tData; + int j; + for( j = 0; j < height; j++ ) + { + int i; + for( i = 0; i < width; i++ ) + { + *pData = (i << 24) + | (i << 16) + | (i << 8) + | (i << 0); + pData++; + } + } + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatRGBA8Uint, width, height, false); + textureDescriptor->setResourceOptions(MTL::ResourceStorageModeManaged | MTL::CPUCacheModeWriteCombined); + MTL::Buffer* buffer = fDevice.fMetalDevice->newBuffer(tData, width * height * sizeof(uint32_t), MTL::ResourceStorageModeManaged | MTL::CPUCacheModeWriteCombined); + buffer->didModifyRange(NS::Range::Make(0, buffer->length())); + MTL::Texture* texture = buffer->newTexture(textureDescriptor, 0, width * 4); + plMetalTextureRef* ref = new plMetalTextureRef(); + ref->fTexture = texture; + + ref->Link(&fTextureRefList); + + fULutTextureRef = ref; + + buffer->release(); + } + return fULutTextureRef; +} + +// IPushShadowCastState //////////////////////////////////////////////////////////////////////////////// +// Push all the state necessary to start rendering this shadow map, but independent of the +// actual shadow caster to be rendered into the map. +bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) +{ + plRenderTarget* renderTarg = IFindRenderTarget(slave->fWidth, slave->fHeight, slave->fView.GetOrthogonal()); + if( !renderTarg ) + return false; + + // Let the slave setup the transforms, viewport, etc. necessary to render it's shadow + // map. This just goes into a plViewTransform, we translate that into D3D state ourselves below. + if (!slave->SetupViewTransform(this)) + return false; + + // Set texture to U_LUT + plMetalTextureRef* ref = IGetULutTextureRef(); + fCurrentRenderPassUniforms->specularSrc = 0.0; + + //if( !ref->fTexture ) + //{ + // if( ref->fData ) + // IReloadTexture( ref ); + //} + //fDevice.SetRenderTarget(ref->fTexture); + + // Push the shadow slave's view transform as our current render state. + fViewStack.push(fView); + fView.SetMaxCullNodes(0); + SetViewTransform(slave->fView); + IProjectionMatrixToDevice(); + + // Push the shadow map as the current render target + PushRenderTarget(renderTarg); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 0); + + // We'll be rendering the light space distance to the span fragment into + // alpha (color is white), so our camera space position, transformed into light space + // and then converted to [0..255] via our ULut. + + //METAL NOTE: D3DTSS_TCI_CAMERASPACEPOSITION and D3DTTFF_COUNT3 are hardcoded into the shader + + // Set texture transform to slave's lut transform. See plShadowMaster::IComputeLUT(). + hsMatrix44 castLUT = slave->fCastLUT; + if( slave->fFlags & plShadowSlave::kCastInCameraSpace ) + { + hsMatrix44 c2w = GetCameraToWorld(); + + castLUT = castLUT * c2w; + } + + simd_float4x4 tXfm; + hsMatrix2SIMD(castLUT, &tXfm); + + fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[0].flags = 0; + fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; + + /*DWORD clearColor = 0xff000000L; +// const int l2NumSamples = kL2NumSamples; // HACKSAMPLE + const int l2NumSamples = mfCurrentTest > 101 ? 3 : 2; + DWORD intens; + if( slave->fBlurScale > 0 ) + { + const int kNumSamples = mfCurrentTest > 101 ? 8 : 4; + int nPasses = (int)ceil(float(kNumSamples) / fMaxLayersAtOnce); + int nSamplesPerPass = kNumSamples / nPasses; + DWORD k = int(128.f / float(nSamplesPerPass)); + intens = (0xff << 24) + | ((128 + k) << 16) + | ((128 + k) << 8) + | ((128 + k) << 0); + clearColor = (0xff << 24) + | ((128 - k) << 16) + | ((128 - k) << 8) + | ((128 - k) << 0); + } + else + intens = 0xffffffff;*/ + + // Note that we discard the shadow caster's alpha here, although we don't + // need to. Even on a 2 texture stage system, we could include the diffuse + // alpha and the texture alpha from the base texture. But we don't. + + // Set color to white. We could accomplish this easier by making the color + // in our ULut white. + /*fD3DDevice->SetRenderState(D3DRS_TEXTUREFACTOR, intens); + + fSettings.fVeryAnnoyingTextureInvalidFlag = true; + fD3DDevice->SetTextureStageState(0, D3DTSS_COLORARG1, D3DTA_TFACTOR); + fD3DDevice->SetTextureStageState(0, D3DTSS_COLOROP, D3DTOP_SELECTARG1); + + fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE); + fD3DDevice->SetTextureStageState(0, D3DTSS_ALPHAOP, D3DTOP_SELECTARG1); + fLayerState[0].fBlendFlags = uint32_t(-1); + + // For stage 1 - disable + fLastEndingStage = 1; + fD3DDevice->SetTextureStageState(1, D3DTSS_COLOROP, D3DTOP_DISABLE); + fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE); + fLayerState[1].fBlendFlags = uint32_t(-1);*/ + + hsRefCnt_SafeAssign( fLayerRef[0], ref ); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, Texture); + + //fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE); + //fD3DDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE); + //fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO); + + //fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS); + + slave->fPipeData = renderTarg; + + // Enable ZBuffering w/ write + //fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE); + // fLayerState[0].fZFlags &= ~hsGMatState::kZMask; + + // Clear the render target: + // alpha to white ensures no shadow where there's no caster + // color to black in case we ever get blurring going + // Z to 1 + // Stencil ignored + if( slave->ReverseZ() ) + { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fReverseZStencilState); + //fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_GREATEREQUAL); + //fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 0.0f, 0L); + } + else + { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fDefaultStencilState); + } + + // Bring the viewport in (AFTER THE CLEAR) to protect the alpha boundary. + fView.GetViewTransform().SetViewPort(1, 1, (float)(slave->fWidth-2), (float)(slave->fHeight-2), false); + fDevice.SetViewport(); + + //inlEnsureLightingOff(); + + return true; +} + +// ISetupShadowLight ////////////////////////////////////////////////////////////////// +// We use the shadow light to modulate the shadow effect in two ways while +// projecting the shadow map onto the scene. +// First, the intensity of the shadow follows the N dot L of the light on +// the surface being projected onto. So on a sphere, the darkening effect +// of the shadow will fall off as the normals go from pointing to the light to +// pointing 90 degrees off. +// Second, we attenuate the whole shadow effect through the lights diffuse color. +// We attenuate for different reasons, like the intensity of the light, or +// to fade out a shadow as it gets too far in the distance to matter. +void plMetalPipeline::ISetupShadowLight(plShadowSlave* slave) +{ + //FIXME: Do we need to clear the fCurrentRenderPassUniforms->lampSources array? + //Feels like we could catch lights from a previous pass + plMetalShaderLightSource lRef = fCurrentRenderPassUniforms->lampSources[0]; + memset(&lRef, 0, sizeof(lRef)); + + lRef.diffuse.r + = lRef.diffuse.g + = lRef.diffuse.b + = slave->fPower; + + slave->fSelfShadowOn = false; + + if( slave->Positional() ) + { + hsPoint3 position = slave->fLightPos; + lRef.position.x = position.fX; + lRef.position.y = position.fY; + lRef.position.z = position.fZ; + + //const float maxRange = 32767.f; + //lRef->fD3DInfo.Range = maxRange; + lRef.constAtten = 1.f; + lRef.linAtten = 0; + lRef.quadAtten = 0; + + //lRef->fD3DInfo.Type = D3DLIGHT_POINT; + lRef.position.w = 1.0; + } + else + { + hsVector3 dir = slave->fLightDir; + lRef.direction.x = dir.fX; + lRef.direction.y = dir.fY; + lRef.direction.z = dir.fZ; + + lRef.position.w = 0.0; + } + + //fD3DDevice->SetLight( lRef->fD3DIndex, &lRef->fD3DInfo ); + fCurrentRenderPassUniforms->lampSources[0] = lRef; + + //Not sure hot to link lights in Metal. Do we even need to? + //slave->fLightIndex = lRef->fD3DIndex; +} + + +// IFindRenderTarget ////////////////////////////////////////////////////////////////// +// Find a matching render target from the pools. We prefer the requested size, but +// will look for a smaller size if there isn't one available. +// Param ortho indicates whether it will be used for orthogonal projection as opposed +// to perspective (directional light vs. point light), but is no longer used. +plRenderTarget* plMetalPipeline::IFindRenderTarget(uint32_t& width, uint32_t& height, bool ortho) +{ + std::vector* pool = nil; + uint32_t* iNext = nil; + // NOT CURRENTLY SUPPORTING NON-SQUARE SHADOWS. IF WE DO, CHANGE THIS. + switch(height) + { + case 512: + pool = &fRenderTargetPool512; + iNext = &fRenderTargetNext[9]; + break; + case 256: + pool = &fRenderTargetPool256; + iNext = &fRenderTargetNext[8]; + break; + case 128: + pool = &fRenderTargetPool128; + iNext = &fRenderTargetNext[7]; + break; + case 64: + pool = &fRenderTargetPool64; + iNext = &fRenderTargetNext[6]; + break; + case 32: + pool = &fRenderTargetPool32; + iNext = &fRenderTargetNext[5]; + break; + default: + return nil; + } + plRenderTarget* rt = (*pool)[*iNext]; + if( !rt ) + { + // We didn't find one, try again the next size down. + if( height > 32 ) + return IFindRenderTarget(width >>= 1, height >>= 1, ortho); + + // We must be totally out. Oh well. + return nil; + } + (*iNext)++; + + return rt; +} + +//// SharedRenderTargetRef ////////////////////////////////////////////////////// +// Same as MakeRenderTargetRef, except specialized for the shadow map generation. +// The shadow map pools of a given dimension (called RenderTargetPool) all share +// a single depth buffer of that size. This allows sharing on NVidia hardware +// that wants the depth buffer dimensions to match the color buffer size. +// It may be that NVidia hardware doesn't care any more. Contact Matthias +// about that. +hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRenderTarget *owner) +{ + plMetalRenderTargetRef* ref = nil; + MTL::Texture* depthSurface = nil; + MTL::Texture* texture = nil; + MTL::Texture* cTexture = nil; + int i; + plCubicRenderTarget* cubicRT; + uint16_t width, height; + + // If we don't already have one to share from, start from scratch. + if( !share ) + return MakeRenderTargetRef(owner); + + //hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd"); + +#ifdef HS_DEBUGGING + // Check out the validity of the match. Debug only. + hsAssert(!owner->GetParent() == !share->GetParent(), "Mismatch on shared render target"); + hsAssert(owner->GetWidth() == share->GetWidth(), "Mismatch on shared render target"); + hsAssert(owner->GetHeight() == share->GetHeight(), "Mismatch on shared render target"); + hsAssert(owner->GetZDepth() == share->GetZDepth(), "Mismatch on shared render target"); + hsAssert(owner->GetStencilDepth() == share->GetStencilDepth(), "Mismatch on shared render target"); +#endif // HS_DEBUGGING + + /// Check--is this renderTarget really a child of a cubicRenderTarget? + if( owner->GetParent() != nil ) + { + /// This'll create the deviceRefs for all of its children as well + SharedRenderTargetRef(share->GetParent(), owner->GetParent()); + return owner->GetDeviceRef(); + } + + if( owner->GetDeviceRef() != nil ) + ref = (plMetalRenderTargetRef *)owner->GetDeviceRef(); + + // Look for a good format of matching color and depth size. + //FIXME: we're hardcoded for a certain tier and we aren't trying to create matching render buffers for efficiency + //if( !IFindRenderTargetInfo(owner, surfFormat, resType) ) + //{ + // hsAssert( false, "Error getting renderTarget info" ); + // return nil; + //} + + + /// Create the render target now + // Start with the depth. We're just going to share the depth surface on the + // input shareRef. + plMetalRenderTargetRef* shareRef = (plMetalRenderTargetRef*)share->GetDeviceRef(); + hsAssert(shareRef, "Trying to share from a render target with no ref"); + depthSurface = shareRef->fDepthBuffer; + + //FIXME: Add the usage to these textures, they're only accessed by the GPU + // Check for Cubic. This is unlikely, since this function is currently only + // used for the shadow map pools. + cubicRT = plCubicRenderTarget::ConvertNoRef( owner ); + if( cubicRT != nil ) + { + /// And create the ref (it'll know how to set all the flags) + if( ref != nil ) + ref->SetOwner(owner); + else { + ref = new plMetalRenderTargetRef(); + ref->SetOwner(owner); + } + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatRGBA8Uint, owner->GetWidth(), false); + MTL::Texture* cubeTexture = fDevice.fMetalDevice->newTexture(textureDescriptor); + + // hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd"); + if( cubeTexture ) + { + + /// Create a CUBIC texture + for( i = 0; i < 6; i++ ) + { + plRenderTarget *face = cubicRT->GetFace( i ); + plMetalRenderTargetRef *fRef; + + if( face->GetDeviceRef() != nil ) + { + fRef = (plMetalRenderTargetRef *)face->GetDeviceRef(); + fRef->SetOwner(face); + if( !fRef->IsLinked() ) + fRef->Link( &fRenderTargetRefList ); + } + else + { + plMetalRenderTargetRef* targetRef = new plMetalRenderTargetRef(); + targetRef->SetOwner(face); + face->SetDeviceRef( targetRef ); + ( (plMetalRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList ); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef( face->GetDeviceRef() ); + } + } + + ref->fTexture = cubeTexture; + } + else + { + hsRefCnt_SafeUnRef(ref); + ref = nil; + } + } + // Is it a texture render target? Probably, since shadow maps are all we use this for. + else if( owner->GetFlags() & plRenderTarget::kIsTexture || owner->GetFlags() & plRenderTarget::kIsOffscreen) + { + //DX seperated the onscreen and offscreen types. Metal doesn't care. All render targets are textures. + /// Create a normal texture + if( ref != nil ) + ref->SetOwner(owner); + else { + ref = new plMetalRenderTargetRef(); + ref->SetOwner(owner); + } + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), owner->GetHeight(), false); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + if( texture ) + { + ref->fTexture = texture; + } + else + { + hsRefCnt_SafeUnRef(ref); + ref = nil; + } + + if (owner->GetZDepth() && (owner->GetFlags() & (plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen))) { + MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + owner->GetWidth(), + owner->GetHeight(), + false); + if(fDevice.fMetalDevice->hasUnifiedMemory()) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } + depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + MTL::Texture *depthBuffer = fDevice.fMetalDevice->newTexture(depthTextureDescriptor); + ref->fDepthBuffer = depthBuffer; + } + } + + if( owner->GetDeviceRef() != ref ) + { + owner->SetDeviceRef( ref ); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef( ref ); + if( ref != nil && !ref->IsLinked() ) + ref->Link( &fRenderTargetRefList ); + } + else + { + if( ref != nil && !ref->IsLinked() ) + ref->Link( &fRenderTargetRefList ); + } + + if( ref != nil ) + { + ref->SetDirty( false ); + } + + return ref; +} + +// IMakeRenderTargetPools ///////////////////////////////////////////////////////////// +// These are actually only used as shadow map pools, but they could be used for other +// render targets. +// All these are created here in a single call because they go in POOL_DEFAULT, so they +// must be created before we start creating things in POOL_MANAGED. +void plMetalPipeline::IMakeRenderTargetPools() +{ + //FIXME: We should probably have a release function for the render target pools + //IReleaseRenderTargetPools(); // Just to be sure. + + // Numbers of render targets to be created for each size. + // These numbers were set with multi-player in mind, so should be reconsidered. + // But do keep in mind that there are many things in production assets that cast + // shadows besides the avatar. + plConst(float) kCount[kMaxRenderTargetNext] = { + 0, // 1x1 + 0, // 2x2 + 0, // 4x4 + 0, // 8x8 + 0, // 16x16 + 32, // 32x32 + 16, // 64x64 + 8, // 128x128 + 4, // 256x256 + 0 // 512x512 + }; + int i; + for( i = 0; i < kMaxRenderTargetNext; i++ ) + { + std::vector* pool = nil; + switch( i ) + { + default: + case 0: + case 1: + case 2: + case 3: + case 4: + break; + + case 5: + pool = &fRenderTargetPool32; + break; + case 6: + pool = &fRenderTargetPool64; + break; + case 7: + pool = &fRenderTargetPool128; + break; + case 8: + pool = &fRenderTargetPool256; + break; + case 9: + pool = &fRenderTargetPool512; + break; + } + if( pool ) + { + pool->resize(kCount[i] + 1); + (*pool)[0] = nil; + (*pool)[(int)(kCount[i])] = nil; + int j; + for( j = 0; j < kCount[i]; j++ ) + { + uint16_t flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected; + uint8_t bitDepth = 32; + uint8_t zDepth = 24; + uint8_t stencilDepth = 0; + + // If we ever allow non-square shadows, change this. + int width = 1 << i; + int height = width; + + plRenderTarget* rt = new plRenderTarget(flags, width, height, bitDepth, zDepth, stencilDepth); + + // If we've failed to create our render target ref, we're probably out of + // video memory. We'll return nil, and this guy just doesn't get a shadow + // until more video memory turns up (not likely). + if( !SharedRenderTargetRef((*pool)[0], rt) ) + { + delete rt; + pool->resize(j+1); + (*pool)[j] = nil; + break; + } + (*pool)[j] = rt; + } + } + } +} + +// IPopShadowCastState /////////////////////////////////////////////////// +// Pop the state set to render this shadow caster, so we're ready to render +// a different shadow caster, or go on to our main render. +bool plMetalPipeline::IPopShadowCastState(plShadowSlave* slave) +{ + fView = fViewStack.top(); + fViewStack.pop(); + + PopRenderTarget(); + fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera; + + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(nullptr, 16); + + return true; +} + +// IResetRenderTargetPools ///////////////////////////////////////////////////////////////// +// No release of resources, this just resets for the start of a frame. So if a shadow +// slave gets a render target from a pool, once this is called (conceptually at the +// end of the frame), the slave no longer owns that render target. +void plMetalPipeline::IResetRenderTargetPools() +{ + int i; + for( i = 0; i < kMaxRenderTargetNext; i++ ) + { + fRenderTargetNext[i] = 0; + //fBlurScratchRTs[i] = nil; + //fBlurDestRTs[i] = nil; + } + + //fLights.fNextShadowLight = 0; +} + +// IRenderShadowCasterSpan ////////////////////////////////////////////////////////////////////// +// Render the span into a rendertarget of the correct size, generating +// a depth map from this light to that span. +void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span) +{ + // Check that it's ready to render. + plProfile_BeginTiming(CheckDyn); + ICheckDynBuffers(drawable, drawable->GetBufferGroup(span.fGroupIdx), &span); + plProfile_EndTiming(CheckDyn); + + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef *)drawable->GetVertexRef(span.fGroupIdx, span.fVBufferIdx); + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef *)drawable->GetIndexRef(span.fGroupIdx, span.fIBufferIdx); + + if( vRef->GetBuffer() == nil || iRef->GetBuffer() == nil ) + { + hsAssert( false, "Trying to render a nil buffer pair!" ); + return; + } + + /// Switch to the vertex buffer we want + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = fDevice.pipelineStateFor(vRef, hsGMatState::kBlendAlpha, fCurrentRenderPassUniforms->numUVSrcs, plShaderID::ID(0), plShaderID::ID(0), true); + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); + + uint32_t vStart = span.fVStartIdx; + uint32_t vLength = span.fVLength; + uint32_t iStart = span.fIPackedIdx; + uint32_t iLength= span.fILength; + + plRenderTriListFunc render(&fDevice, 0, vStart, vLength, iStart, iLength); + + static hsMatrix44 emptyMatrix; + hsMatrix44 m = emptyMatrix; + + ISetupTransforms(drawable, span, NULL, m); + + bool flip = slave->ReverseCull(); + ISetCullMode(flip); + + render.RenderPrims(); +} + + +// IRenderShadowsOntoSpan ///////////////////////////////////////////////////////////////////// +// After doing the usual render for a span (all passes), we call the following. +// If the span accepts shadows, this will loop over all the shadows active this +// frame, and apply the ones that intersect this spans bounds. See below for details. +void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef *vRef) +{ + // We've already computed which shadows affect this span. That's recorded in slaveBits. + const hsBitVector& slaveBits = span->GetShadowSlaves(); + + bool first = true; + + int i; + for( i = 0; i < fShadows.size(); i++ ) + { + if( slaveBits.IsBitSet(fShadows[i]->fIndex) ) + { + // This slave affects this span. + if( first ) + { + + // On the first, we do all the setup that is independent of + // the shadow slave, so state that needs to get set once before + // projecting any number of shadow maps. + ISetupShadowRcvTextureStages(mat); + + first = false; + + } + + // Now setup any state specific to this shadow slave. + ISetupShadowSlaveTextures(fShadows[i]); + + // See ISetupShadowLight below for how the shadow light is used. + // The shadow light isn't used in generating the shadow map, it's used + // in projecting the shadow map onto the scene. + ISetupShadowLight(fShadows[i]); + + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = fDevice.pipelineStateFor(vRef, hsGMatState::kBlendAlpha, fCurrentRenderPassUniforms->numUVSrcs, plShaderID::ID(0), plShaderID::ID(0), 2); + if(fCurrentPipelineState != linkedPipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fCurrentPipelineState = linkedPipeline->pipelineState; + } + + int selfShadowNow = span->IsShadowBitSet(fShadows[i]->fIndex); + + // We vary the shadow intensity when self shadowing (see below), + // so we cache whether the shadow light is set for regular or + // self shadowing intensity. If what we're doing now is different + // than what we're currently set for, set it again. + //if( selfShadowNow != fShadows[i]->fSelfShadowOn ) + //{ + plMetalShaderLightSource lRef = fCurrentRenderPassUniforms->lampSources[0]; + + // We lower the power on self shadowing, because the artists like to + // crank up the shadow strength to huge values to get a darker shadow + // on the environment, which causes the shadow on the avatar to get + // way too dark. Another way to look at it is when self shadowing, + // the surface being projected onto is going to be very close to + // the surface casting the shadow (because they are the same object). + if( selfShadowNow ) + { + plConst(float) kMaxSelfPower = 0.3f; + float power = (float) fShadows[i]->fPower > kMaxSelfPower ? (float) kMaxSelfPower : ((float) fShadows[i]->fPower); + lRef.diffuse.r = lRef.diffuse.b = lRef.diffuse.g = power; + } + else + { + lRef.diffuse.r = lRef.diffuse.b = lRef.diffuse.g = fShadows[i]->fPower; + } + lRef.scale = 1.0; + fCurrentRenderPassUniforms->lampSources[0] = lRef; + + // record which our intensity is now set for. + fShadows[i]->fSelfShadowOn = selfShadowNow; + //} + + // Enable the light. + //fD3DDevice->LightEnable(fShadows[i]->fLightIndex, true);*/ + +#ifndef PLASMA_EXTERNAL_RELEASE + if (!IsDebugFlagSet(plPipeDbg::kFlagNoShadowApply)) +#endif // PLASMA_EXTERNAL_RELEASE + render.RenderPrims(); + + // Disable it again. + //fD3DDevice->LightEnable(fShadows[i]->fLightIndex, false); + + } + } + +} + + +// ISetupShadowRcvTextureStages //////////////////////////////////////////// +// Set the generic stage states. We'll fill in the specific textures +// for each slave later. +void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) +{ + //Do this first, this normally stomps all over our uniforms + //FIXME: Way to encode layers without stomping all over uniforms? + plMetalMaterialShaderRef* matShader = (plMetalMaterialShaderRef *)mat->GetDeviceRef(); + //matShader->encodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, 0, 0, nullptr); + + // We're whacking about with renderstate independent of current material, + // so make sure the next span processes it's material, even if it's the + // same one. + fForceMatHandle = true; + + // Set the D3D lighting/material model + ISetShadowLightState(mat); + + // Zbuffering on read-only + + + if(fCurrentDepthStencilState != fDevice.fNoZWriteStencilState) { + fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZWriteStencilState); + fCurrentDepthStencilState = fDevice.fNoZWriteStencilState; + } + + plMetalTextureRef* ref = IGetULutTextureRef(); + if( !ref->fTexture ) + { + //if( ref->fData ) + // IReloadTexture(ref); + } + hsRefCnt_SafeAssign(fLayerRef[1], ref); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 17); + + int numUVSrcs = 2; + + int layerIndex = -1; + // If mat's base layer is alpha'd, and we have > 3 TMU's factor + // in the base layer's alpha. + if( (fMaxLayersAtOnce > 3) && mat->GetLayer(0)->GetTexture() && (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha) ) + { + plLayerInterface* layer = mat->GetLayer(0); + layerIndex = 0; + + + + // If the following conditions are met, it means that layer 1 is a better choice to + // get the transparency from. The specific case we're looking for is vertex alpha + // simulated by an invisible second layer alpha LUT (known as the alpha hack). + if( (layer->GetMiscFlags() & hsGMatState::kMiscBindNext) + && mat->GetLayer(1) + && !(mat->GetLayer(1)->GetMiscFlags() & hsGMatState::kMiscNoShadowAlpha) + && !(mat->GetLayer(1)->GetBlendFlags() & hsGMatState::kBlendNoTexAlpha) + && mat->GetLayer(1)->GetTexture() ) { + layer = mat->GetLayer(1); + layerIndex = 1; + } + + // Normal UVW source. + uint32_t uvwSrc = layer->GetUVWSrc(); + + // Normal UVW source. + fCurrentRenderPassUniforms->uvTransforms[2].UVWSrc = uvwSrc; + // MiscFlags to layer's misc flags + fCurrentRenderPassUniforms->uvTransforms[2].flags = layer->GetMiscFlags(); + matrix_float4x4 tXfm; + hsMatrix2SIMD(layer->GetTransform(), &tXfm); + fCurrentRenderPassUniforms->uvTransforms[2].transform = tXfm; + + numUVSrcs++; + } + + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&layerIndex, sizeof(int), FragmentShaderArgumentShadowAlphaSrc); + + fCurrentRenderPassUniforms->numUVSrcs = numUVSrcs; +} + +// ISetShadowLightState ////////////////////////////////////////////////////////////////// +// Set the D3D lighting/material model for projecting the shadow map onto this material. +void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) +{ + IDisableLightsForShadow(); + //inlEnsureLightingOn(); + + fCurrLightingMethod = plSpan::kLiteShadow; + + if( mat && mat->GetNumLayers() && mat->GetLayer(0) ) + fCurrentRenderPassUniforms->diffuseCol.r = fCurrentRenderPassUniforms->diffuseCol.g = fCurrentRenderPassUniforms->diffuseCol.b = mat->GetLayer(0)->GetOpacity(); + else + fCurrentRenderPassUniforms->diffuseCol.r = fCurrentRenderPassUniforms->diffuseCol.g = fCurrentRenderPassUniforms->diffuseCol.b = 1.f; + fCurrentRenderPassUniforms->diffuseCol.a = 1.f; + + fCurrentRenderPassUniforms->diffuseSrc = 1.0; + fCurrentRenderPassUniforms->emissiveSrc = 1.0; + fCurrentRenderPassUniforms->emissiveCol = 0.0; + fCurrentRenderPassUniforms->specularSrc = 0.0; + fCurrentRenderPassUniforms->ambientSrc = 0.0; + fCurrentRenderPassUniforms->globalAmb = 0.0; + + //fD3DDevice->SetMaterial(&d3dMat); + //fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );*/ +} + +// IDisableLightsForShadow /////////////////////////////////////////////////////////// +// Disable any lights that are enabled. We'll only want the shadow light illuminating +// the surface. +void plMetalPipeline::IDisableLightsForShadow() +{ + int i; + for( i = 0; i < 8; i++ ) + { + IDisableLight(nullptr, i); + } +} + +// ISetupShadowSlaveTextures ////////////////////////////////////////////// +// Set any state specific to this shadow slave for projecting the slave's +// shadow map onto the surface. +void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) +{ + //D3DMATRIX tXfm; + + hsMatrix44 c2w = GetCameraToWorld(); + + // Stage 0: + // Set Stage 0's texture to the slave's rendertarget. + // Set texture transform to slave's camera to texture transform + plRenderTarget* renderTarg = (plRenderTarget*)slave->fPipeData; + hsAssert(renderTarg, "Processing a slave that hasn't been rendered"); + if( !renderTarg ) + return; + plMetalTextureRef* ref = (plMetalTextureRef*)renderTarg->GetDeviceRef(); + hsAssert(ref, "Shadow map ref should have been made when it was rendered"); + if( !ref ) + return; + + hsRefCnt_SafeAssign( fLayerRef[0], ref ); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 16); + + plMetalShadowCastFragmentShaderArgumentBuffer uniforms; + uniforms.pointLightCast = slave->fView.GetOrthogonal() ? false : true; + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&uniforms, sizeof(plMetalShadowCastFragmentShaderArgumentBuffer), BufferIndexShadowCastFragArgBuffer); + + hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w; + simd_float4x4 tXfm; + hsMatrix2SIMD(cameraToTexture, &tXfm); + + fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; + fCurrentRenderPassUniforms->uvTransforms[0].flags = 0; + fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + + // Stage 1: the lut + // Set the texture transform to slave's fRcvLUT + hsMatrix44 cameraToLut = slave->fRcvLUT * c2w; + hsMatrix2SIMD(cameraToLut, &tXfm); + + fCurrentRenderPassUniforms->uvTransforms[1].UVWSrc = plLayerInterface::kUVWPosition; + fCurrentRenderPassUniforms->uvTransforms[1].flags = 0; + fCurrentRenderPassUniforms->uvTransforms[1].transform = tXfm; + +} + +/////////////////////////////////////////////////////////////////////////////// +//// View Stuff /////////////////////////////////////////////////////////////// +/////////////////////////////////////////////////////////////////////////////// + +//// IIsViewLeftHanded //////////////////////////////////////////////////////// +// Returns true if the combination of the local2world and world2camera +// matrices is left-handed. + +bool plMetalPipeline::IIsViewLeftHanded() +{ + return fView.GetViewTransform().GetOrthogonal() ^ ( fView.fLocalToWorldLeftHanded ^ fView.fWorldToCamLeftHanded ) ? true : false; +} + +//// ISetCullMode ///////////////////////////////////////////////////////////// +// Tests and sets the current winding order cull mode (CW, CCW, or none). +// Will reverse the cull mode as necessary for left handed camera or local to world +// transforms. +void plMetalPipeline::ISetCullMode(bool flip) +{ + MTL::CullMode newCullMode = !IIsViewLeftHanded() ^ !flip ? MTL::CullModeFront : MTL::CullModeBack; + fDevice.CurrentRenderCommandEncoder()->setCullMode(newCullMode); + fCurrentCullMode = newCullMode; +} + +plMetalDevice* plMetalPipeline::GetMetalDevice() +{ + return &fDevice; +} + +//// Local Static Stuff /////////////////////////////////////////////////////// + +//FIXME: CPU avatar stuff that should be evaluated once this moves onto the GPU. + +template +static inline void inlCopy(uint8_t*& src, uint8_t*& dst) +{ + T* src_ptr = reinterpret_cast(src); + T* dst_ptr = reinterpret_cast(dst); + *dst_ptr = *src_ptr; + src += sizeof(T); + dst += sizeof(T); +} + +template +static inline const uint8_t* inlExtract(const uint8_t* src, T* val) +{ + const T* ptr = reinterpret_cast(src); + *val = *ptr++; + return reinterpret_cast(ptr); +} + +template<> +inline const uint8_t* inlExtract(const uint8_t* src, hsPoint3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 1.f; + return reinterpret_cast(src_ptr); +} + +template<> +inline const uint8_t* inlExtract(const uint8_t* src, hsVector3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 0.f; + return reinterpret_cast(src_ptr); +} + +template +static inline void inlSkip(uint8_t*& src) +{ + src += sizeof(T) * N; +} + +template +static inline uint8_t* inlStuff(uint8_t* dst, const T* val) +{ + T* ptr = reinterpret_cast(dst); + *ptr++ = *val; + return reinterpret_cast(ptr); +} + +//// ISoftwareVertexBlend /////////////////////////////////////////////////////// +// Emulate matrix palette operations in software. The big difference between the hardware +// and software versions is we only want to lock the vertex buffer once and blend all the +// verts we're going to in software, so the vertex blend happens once for an entire drawable. +// In hardware, we want the opposite, to break it into managable chunks, manageable meaning +// few enough matrices to fit into hardware registers. So for hardware version, we set up +// our palette, draw a span or few, setup our matrix palette with new matrices, draw, repeat. +bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std::vector& visList) +{ + if (IsDebugFlagSet(plPipeDbg::kFlagNoSkinning)) + return true; + + if (drawable->GetSkinTime() == fRenderCnt) + return true; + + const hsBitVector& blendBits = drawable->GetBlendingSpanVector(); + + if (drawable->GetBlendingSpanVector().Empty()) { + // This sucker doesn't have any skinning spans anyway. Just return + drawable->SetSkinTime(fRenderCnt); + return true; + } + + plProfile_BeginTiming(Skin); + + // lock the data buffer + + // First, figure out which buffers we need to blend. + const int kMaxBufferGroups = 20; + const int kMaxVertexBuffers = 20; + static char blendBuffers[kMaxBufferGroups][kMaxVertexBuffers]; + memset(blendBuffers, 0, kMaxBufferGroups * kMaxVertexBuffers * sizeof(**blendBuffers)); + + hsAssert(kMaxBufferGroups >= drawable->GetNumBufferGroups(), "Bigger than we counted on num groups skin."); + + const std::vector& spans = drawable->GetSpanArray(); + int i; + for (i = 0; i < visList.size(); i++) { + if (blendBits.IsBitSet(visList[i])) { + const plVertexSpan &vSpan = *(plVertexSpan *)spans[visList[i]]; + hsAssert(kMaxVertexBuffers > vSpan.fVBufferIdx, "Bigger than we counted on num buffers skin."); + + blendBuffers[vSpan.fGroupIdx][vSpan.fVBufferIdx] = 1; + drawable->SetBlendingSpanVectorBit(visList[i], false); + } + } + + // Now go through each of the group/buffer (= a real vertex buffer) pairs we found, + // and blend into it. We'll lock the buffer once, and then for each span that + // uses it, set the matrix palette and and then do the blend for that span. + // When we've done all the spans for a group/buffer, we unlock it and move on. + int j; + for( i = 0; i < kMaxBufferGroups; i++ ) + { + for( j = 0; j < kMaxVertexBuffers; j++ ) + { + if( blendBuffers[i][j] ) + { + // Found one. Do the lock. + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)drawable->GetVertexRef(i, j); + + hsAssert(vRef->fData, "Going into skinning with no place to put results!"); + + uint8_t* destPtr = vRef->fData; + + int k; + for (k = 0; k < visList.size(); k++) { + const plIcicle& span = *(plIcicle*)spans[visList[k]]; + if (span.fGroupIdx == i && span.fVBufferIdx == j) { + plProfile_Inc(NumSkin); + + hsMatrix44* matrixPalette = drawable->GetMatrixPalette(span.fBaseMatrix); + matrixPalette[0] = span.fLocalToWorld; + + uint8_t* ptr = vRef->fOwner->GetVertBufferData(vRef->fIndex); + ptr += span.fVStartIdx * vRef->fOwner->GetVertexSize(); + IBlendVertBuffer( (plSpan*)&span, + matrixPalette, span.fNumMatrices, + ptr, + vRef->fOwner->GetVertexFormat(), + vRef->fOwner->GetVertexSize(), + destPtr + span.fVStartIdx * vRef->fVertexSize, + vRef->fVertexSize, + span.fVLength, + span.fLocalUVWChans ); + vRef->SetDirty(true); + } + } + // Unlock and move on. + } + } + } + + plProfile_EndTiming(Skin); + + if (drawable->GetBlendingSpanVector().Empty()) { + // Only do this if we've blended ALL of the spans. Thus, this becomes a trivial + // rejection for all the skinning flags being cleared + drawable->SetSkinTime(fRenderCnt); + } + + return true; +} + + +//// IBlendVertsIntoBuffer //////////////////////////////////////////////////// +// Given a pointer into a buffer of verts that have blending data in the D3D +// format, blends them into the destination buffer given without the blending +// info. + +void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMatrices, + const uint8_t* src, uint8_t format, uint32_t srcStride, + uint8_t* dest, uint32_t destStride, uint32_t count, + uint16_t localUVWChans) +{ + float pt_buf[] = { 0.f, 0.f, 0.f, 1.f }; + float vec_buf[] = { 0.f, 0.f, 0.f, 0.f }; + hsPoint3* pt = reinterpret_cast(pt_buf); + hsVector3* vec = reinterpret_cast(vec_buf); + + uint32_t indices; + float weights[4]; + + // Dropped support for localUVWChans at templatization of code + hsAssert(localUVWChans == 0, "support for skinned UVWs dropped. reimplement me?"); + const size_t uvChanSize = plGBufferGroup::CalcNumUVs(format) * sizeof(float) * 3; + uint8_t numWeights = (format & plGBufferGroup::kSkinWeightMask) >> 4; + + for (uint32_t i = 0; i < count; ++i) { + // Extract data + src = inlExtract(src, pt); + + float weightSum = 0.f; + for (uint8_t j = 0; j < numWeights; ++j) { + src = inlExtract(src, &weights[j]); + weightSum += weights[j]; + } + weights[numWeights] = 1.f - weightSum; + + if (format & plGBufferGroup::kSkinIndices) + src = inlExtract(src, &indices); + else + indices = 1 << 8; + src = inlExtract(src, vec); + + // Destination buffers (float4 for SSE alignment) + simd_float4 destNorm_buf = (simd_float4){ 0.f, 0.f, 0.f, 0.f }; + simd_float4 destPt_buf = (simd_float4){ 0.f, 0.f, 0.f, 1.f }; + + simd_float4x4 simdMatrix; + + // Blend + for (uint32_t j = 0; j < numWeights + 1; ++j) { + hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix); + if (weights[j]) { + //Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine. + destPt_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)pt_buf); + destNorm_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)vec_buf); + } + //ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf); + indices >>= 8; + } + // Probably don't really need to renormalize this. There errors are + // going to be subtle and "smooth". + /* hsFastMath::NormalizeAppr(destNorm); */ + + // Slam data into position now + dest = inlStuff(dest, reinterpret_cast(&destPt_buf)); + dest = inlStuff(dest, reinterpret_cast(&destNorm_buf)); + + // Jump past colors and UVws + dest += sizeof(uint32_t) * 2 + uvChanSize; + src += sizeof(uint32_t) * 2 + uvChanSize; + } +} + +//Resource checking + +// CheckTextureRef ////////////////////////////////////////////////////// +// Make sure the given layer's texture has background D3D resources allocated. +void plMetalPipeline::CheckTextureRef(plLayerInterface* layer) +{ + plBitmap* bitmap = layer->GetTexture(); + + if (bitmap) { + plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); + + if (!tRef) { + tRef = new plMetalTextureRef(); + + fDevice.SetupTextureRef(layer, bitmap, tRef); + } + + if (!tRef->IsLinked()) { + tRef->Link(&fTextureRefList); + } + + // Make sure it has all resources created. + fDevice.CheckTexture(tRef); + + // If it's dirty, refill it. + if (tRef->IsDirty()) { + plMipmap* mip = plMipmap::ConvertNoRef(bitmap); + if (mip) { + fDevice.MakeTextureRef(tRef, layer, mip); + return; + } + + plCubicEnvironmap* cubic = plCubicEnvironmap::ConvertNoRef(bitmap); + if (cubic) { + fDevice.MakeCubicTextureRef(tRef, layer, cubic); + return; + } + } + } +} + +// CheckVertexBufferRef ///////////////////////////////////////////////////// +// Make sure the buffer group has a valid buffer ref and that it is up to date. +void plMetalPipeline::CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) +{ + // First, do we have a device ref at this index? + plMetalVertexBufferRef* vRef = static_cast(owner->GetVertexBufferRef(idx)); + + // If not + if (!vRef) { + // Make the blank ref + vRef = new plMetalVertexBufferRef(); + + fDevice.SetupVertexBufferRef(owner, idx, vRef); + } + + if (!vRef->IsLinked()) { + vRef->Link(&fVtxBuffRefList); + } + + // One way or another, we now have a vbufferref[idx] in owner. + // Now, does it need to be (re)filled? + // If the owner is volatile, then we hold off. It might not + // be visible, and we might need to refill it again if we + // have an overrun of our dynamic buffer. + if (!vRef->Volatile()) { + // If it's a static buffer, allocate a vertex buffer for it. + fDevice.CheckStaticVertexBuffer(vRef, owner, idx); + + // Might want to remove this assert, and replace it with a dirty check + // if we have static buffers that change very seldom rather than never. + hsAssert(!vRef->IsDirty(), "Non-volatile vertex buffers should never get dirty"); + } + else + { + // Make sure we're going to be ready to fill it. + if (!vRef->fData && (vRef->fFormat != owner->GetVertexFormat())) + { + vRef->fData = new uint8_t[vRef->fCount * vRef->fVertexSize]; + fDevice.FillVolatileVertexBufferRef(vRef, owner, idx); + } + } +} + +// ISetupVertexBufferRef ///////////////////////////////////////////////////////// +// Initialize input vertex buffer ref according to source. +void plMetalPipeline::ISetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalVertexBufferRef* vRef) +{ + + uint8_t format = owner->GetVertexFormat(); + + // All indexed skinning is currently done on CPU, so the source data + // will have indices, but we strip them out for the D3D buffer. + if( format & plGBufferGroup::kSkinIndices ) + { + format &= ~(plGBufferGroup::kSkinWeightMask | plGBufferGroup::kSkinIndices); + format |= plGBufferGroup::kSkinNoWeights; // Should do nothing, but just in case... + vRef->SetSkinned(true); + vRef->SetVolatile(true); + } + + uint32_t vertSize = IGetBufferFormatSize(format); // vertex stride + uint32_t numVerts = owner->GetVertBufferCount(idx); + + vRef->fOwner = owner; + vRef->fCount = numVerts; + vRef->fVertexSize = vertSize; + vRef->fFormat = format; + vRef->fRefTime = 0; + + vRef->SetDirty(true); + vRef->SetRebuiltSinceUsed(true); + vRef->fData = nullptr; + + vRef->SetVolatile(vRef->Volatile() || owner->AreVertsVolatile()); + + vRef->fIndex = idx; + + owner->SetVertexBufferRef(idx, vRef); + hsRefCnt_SafeUnRef(vRef); +} + +// CheckIndexBufferRef ///////////////////////////////////////////////////// +// Make sure the buffer group has an index buffer ref and that its data is current. +void plMetalPipeline::CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) +{ + plMetalIndexBufferRef* iRef = static_cast(owner->GetIndexBufferRef(idx)); + + if (!iRef) { + // Create one from scratch. + iRef = new plMetalIndexBufferRef(); + + fDevice.SetupIndexBufferRef(owner, idx, iRef); + } + + if (!iRef->IsLinked()) { + iRef->Link(&fIdxBuffRefList); + } + + // Make sure it has all resources created. + fDevice.CheckIndexBuffer(iRef); + + // If it's dirty, refill it. + if (iRef->IsDirty()) { + fDevice.FillIndexBufferRef(iRef, owner, idx); + } +} + +//// IGetBufferFormatSize ///////////////////////////////////////////////////// +// Calculate the vertex stride from the given format. +uint32_t plMetalPipeline::IGetBufferFormatSize( uint8_t format ) const +{ + uint32_t size = sizeof( float ) * 6 + sizeof( uint32_t ) * 2; // Position and normal, and two packed colors + + + switch( format & plGBufferGroup::kSkinWeightMask ) + { + case plGBufferGroup::kSkinNoWeights: + break; + case plGBufferGroup::kSkin1Weight: + size += sizeof(float); + break; + default: + hsAssert( false, "Invalid skin weight value in IGetBufferFormatSize()" ); + } + + size += sizeof( float ) * 3 * plGBufferGroup::CalcNumUVs( format ); + + return size; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h new file mode 100644 index 0000000000..faad2c1ce5 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -0,0 +1,238 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plMetalPipeline_inc_ +#define _plMetalPipeline_inc_ + +#include "plPipeline/pl3DPipeline.h" +#include "plPipeline/hsG3DDeviceSelector.h" +#include "plMetalDevice.h" +#include +#include +#include "ShaderTypes.h" + +class plIcicle; +class plPlate; +class plMetalMaterialShaderRef; +class plAuxSpan; +class plMetalVertexShader; +class plMetalFragmentShader; +class plShadowCaster; + +const uint kMaxSkinWeightsPerMaterial = 3; + +//// Helper Classes /////////////////////////////////////////////////////////// + +//// The RenderPrimFunc lets you have one function which does a lot of stuff +// around the actual call to render whatever type of primitives you have, instead +// of duplicating everything because the one line to render is different. +class plRenderPrimFunc +{ +public: + virtual bool RenderPrims() const = 0; // return true on error +}; + +class plMetalPipeline : public pl3DPipeline +{ +public: + //The actual client should set this callback so we can retrieve drawables from the window server + std::function currentDrawableCallback; + //caching the frag function here so that the shader compiler can quickly access it + MTL::Function* fFragFunction; + +protected: + + friend class plMetalDevice; + friend class plMetalPlateManager; + friend class plMetalMaterialShaderRef; + friend class plRenderTriListFunc; + + plMetalMaterialShaderRef* fMatRefList; + plMetalRenderTargetRef* fRenderTargetRefList; + +public: + plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode); + virtual ~plMetalPipeline(); + + CLASSNAME_REGISTER(plMetalPipeline); + GETINTERFACE_ANY(plMetalPipeline, plPipeline); + + /* All of these virtual methods are not implemented by pl3DPipeline and + * need to be re-implemented here! + */ + + /*** VIRTUAL METHODS ***/ + bool PreRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr=nullptr) override; + bool PrepForRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr=nullptr) override; + plTextFont* MakeTextFont(char* face, uint16_t size) override; + bool OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) override; + bool CloseAccess(plAccessSpan& acc) override; + void PushRenderRequest(plRenderRequest* req) override; + void PopRenderRequest(plRenderRequest* req) override; + void ClearRenderTarget(plDrawable* d) override; + void ClearRenderTarget(const hsColorRGBA* col = nullptr, const float* depth = nullptr) override; + hsGDeviceRef* MakeRenderTargetRef(plRenderTarget* owner) override; + bool BeginRender() override; + bool EndRender() override; + void RenderScreenElements() override; + bool IsFullScreen() const override; + void Resize(uint32_t width, uint32_t height) override; + void LoadResources() override; + bool SetGamma(float eR, float eG, float eB) override; + bool SetGamma(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) override; + bool CaptureScreen(plMipmap* dest, bool flipVertical = false, uint16_t desiredWidth = 0, uint16_t desiredHeight = 0) override; + plMipmap* ExtractMipMap(plRenderTarget* targ) override; + void GetSupportedDisplayModes(std::vector *res, int ColorDepth = 32 ) override; + int GetMaxAnisotropicSamples() override; + int GetMaxAntiAlias(int Width, int Height, int ColorDepth) override; + void ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync = false) override; + void RenderSpans(plDrawableSpans* ice, const std::vector& visList) override; + void ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, plMetalMaterialShaderRef* mRef, hsMatrix44& lastL2W); + bool ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase); + bool IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef); + void IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, + hsGDeviceRef* ib, hsGMaterial* material, + uint32_t vStart, uint32_t vLength, + uint32_t iStart, uint32_t iLength); + void IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux); + void IRenderAuxSpans(const plSpan& span); + bool IHandleMaterial(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders = true); + plMetalDevice* GetMetalDevice(); + + // Create and/or Refresh geometry buffers + void CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) override; + void CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) override; + void CheckTextureRef(plLayerInterface* lay) override; + + void ISetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalVertexBufferRef* vRef); + uint32_t IGetBufferFormatSize( uint8_t format ) const; +private: + MTL::RenderPipelineState* fPipelineState; + VertexUniforms* fCurrentRenderPassUniforms; + + //cache to prevent oversetting, Metal won't catch this for us and will encode extra work + MTL::RenderPipelineState* fCurrentPipelineState; + MTL::DepthStencilState* fCurrentDepthStencilState; + + void FindFragFunction(); + + void ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj = false); + void IEnableLight(plMetalMaterialShaderRef* mRef, size_t i, plLightInfo* light); + void IDisableLight(plMetalMaterialShaderRef* mRef, size_t i); + void IScaleLight(plMetalMaterialShaderRef* mRef, size_t i, float scale); + void ICalcLighting(plMetalMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan); + void IHandleBlendMode(hsGMatState flags); + void IHandleZMode(hsGMatState flags); + + void IDrawPlate(plPlate* plate); + void IPreprocessAvatarTextures(); + void IClearShadowSlaves(); + + void IReleaseDynDeviceObjects(); + bool ICreateDynDeviceObjects(); + + bool IIsViewLeftHanded(); + void ISetCullMode(bool flip = false); + + plLayerInterface* IPushOverBaseLayer(plLayerInterface* li); + plLayerInterface* IPopOverBaseLayer(plLayerInterface* li); + plLayerInterface* IPushOverAllLayer(plLayerInterface* li); + plLayerInterface* IPopOverAllLayer(plLayerInterface* li); + + void IPushPiggyBacks(hsGMaterial* mat); + void IPopPiggyBacks(); + void IPushProjPiggyBack(plLayerInterface* li); + void IPopProjPiggyBacks(); + int ISetNumActivePiggyBacks(); + bool ICheckAuxBuffers(const plAuxSpan* span); + + void ISetPipeConsts(plShader* shader); + bool ISetShaders(const plMetalVertexBufferRef * vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader); + + bool ISoftwareVertexBlend(plDrawableSpans* drawable, const std::vector& visList); + void IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMatrices, + const uint8_t* src, uint8_t format, uint32_t srcStride, + uint8_t* dest, uint32_t destStride, uint32_t count, + uint16_t localUVWChans); + + plMetalVertexShader* fVShaderRefList; + plMetalFragmentShader* fPShaderRefList; + MTL::CullMode fCurrentCullMode; + + bool IPrepShadowCaster(const plShadowCaster* caster); + bool IRenderShadowCaster(plShadowSlave* slave); + void IPreprocessShadows(); + bool IPushShadowCastState(plShadowSlave* slave); + plRenderTarget* IFindRenderTarget(uint32_t& width, uint32_t& height, bool ortho); + bool IPopShadowCastState(plShadowSlave* slave); + void IResetRenderTargetPools(); + void IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span); + plMetalTextureRef* IGetULutTextureRef(); + plMetalTextureRef* fULutTextureRef; + void ISetupShadowLight(plShadowSlave* slave); + void IMakeRenderTargetPools(); + hsGDeviceRef* SharedRenderTargetRef(plRenderTarget* share, plRenderTarget *owner); + void IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef *vRef); + void ISetupShadowRcvTextureStages(hsGMaterial* mat); + void ISetupShadowSlaveTextures(plShadowSlave* slave); + void ISetShadowLightState(hsGMaterial* mat); + void IDisableLightsForShadow(); + void IReleaseRenderTargetPools(); + void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); + + // Shadows + std::vector fRenderTargetPool512; + std::vector fRenderTargetPool256; + std::vector fRenderTargetPool128; + std::vector fRenderTargetPool64; + std::vector fRenderTargetPool32; + enum { kMaxRenderTargetNext = 10 }; + uint32_t fRenderTargetNext[kMaxRenderTargetNext]; + + std::vector fProjEach; + std::vector fProjAll; + + void PushCurrentLightSources(); + void PopCurrentLightSources(); + std::vector fLightSourceStack; +}; + +#endif // _plGLPipeline_inc_ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp new file mode 100644 index 0000000000..38b9c4edba --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -0,0 +1,146 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include "plMetalPlateManager.h" +#include "plMetalPipeline.h" +#include +#include "ShaderTypes.h" + +plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) + : plPlateManager(pipe), + fVtxBuffer(0) +{ + //Compile the shaders and link our pipeline for plates + MTL::Library *library = pipe->fDevice.fMetalDevice->newDefaultLibrary(); + MTL::Function *fragFunction = library->newFunction( + NS::String::string("fragmentShader", NS::ASCIIStringEncoding) + ); + MTL::Function *vertFunction = library->newFunction( + NS::String::string("plateVertexShader", NS::ASCIIStringEncoding) + ); + MTL::RenderPipelineDescriptor *descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + descriptor->setFragmentFunction(fragFunction); + descriptor->setVertexFunction(vertFunction); + descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->setDepthAttachmentPixelFormat(MTL::PixelFormatDepth32Float_Stencil8); + + //create the descriptor of the vertex array + MTL::VertexDescriptor *vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); + vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(0)->setBufferIndex(VertexAttributePosition); + vertexDescriptor->attributes()->object(0)->setOffset(0); + vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(1)->setBufferIndex(VertexAttributeTexcoord); + vertexDescriptor->attributes()->object(1)->setOffset(0); + + vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 2); + vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 2); + + descriptor->setVertexDescriptor(vertexDescriptor); + + MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); + depthDescriptor->setDepthWriteEnabled(false); + fDepthState = pipe->fDevice.fMetalDevice->newDepthStencilState(depthDescriptor); + depthDescriptor->release(); + + NS::Error *error; + fPlateRenderPipelineState = pipe->fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); + library->release(); + fragFunction->release(); + vertFunction->release(); + descriptor->release(); +} + +void plMetalPlateManager::ICreateGeometry() +{ + plMetalPipeline *pipeline = (plMetalPipeline *)fOwner; + if(!fVtxBuffer) { + struct plateVertexBuffer vertexBuffer; + vertexBuffer.vertices[0].Set(-0.5f, -0.5f); + vertexBuffer.uv[0].Set(0.0f, 0.0f); + + vertexBuffer.vertices[1].Set(-0.5f, 0.5f); + vertexBuffer.uv[1].Set(0.0f, 1.0f); + + vertexBuffer.vertices[2].Set(0.5f, -0.5f); + vertexBuffer.uv[2].Set(1.0f, 0.0f); + + vertexBuffer.vertices[3].Set(0.5f, 0.5f); + vertexBuffer.uv[3].Set(1.0f, 1.0f); + + uint16_t indices[6] = {0, 1, 2, 1, 2, 3}; + + fVtxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&vertexBuffer, sizeof(plateVertexBuffer), MTL::StorageModeManaged); + fVtxBuffer->retain(); + idxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&indices, sizeof(uint16_t) * 6, MTL::StorageModeManaged); + } +} + +void plMetalPlateManager::encodeVertexBuffer(MTL::RenderCommandEncoder *encoder) { + encoder->setVertexBuffer(fVtxBuffer, 0, VertexAttributePosition); + encoder->setVertexBuffer(fVtxBuffer, offsetof(plateVertexBuffer, uv), VertexAttributeTexcoord); + + encoder->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, 6, MTL::IndexTypeUInt16, idxBuffer, 0); +} + +void plMetalPlateManager::IReleaseGeometry() +{ + //fVtxBuffer->release(); +} + +void plMetalPlateManager::IDrawToDevice(plPipeline *pipe) { + plMetalPipeline *pipeline = (plMetalPipeline *)pipe; + plPlate* plate = nullptr; + + for (plate = fPlates; plate != nullptr; plate = plate->GetNext()) { + if (plate->IsVisible()) { + pipeline->IDrawPlate(plate); + } + } +} + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h new file mode 100644 index 0000000000..f26df1e49b --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -0,0 +1,74 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plMetalPlateManager_hpp +#define plMetalPlateManager_hpp + +#include +#include "plPipeline/plPlates.h" +#include +#include +#include "hsPoint2.h" + +class plMetalPipeline; + +class plMetalPlateManager : public plPlateManager +{ + friend class plMetalPipeline; +public: + plMetalPlateManager(plMetalPipeline* pipe); + void IDrawToDevice(plPipeline *pipe) override; + void ICreateGeometry(); + void IReleaseGeometry(); + MTL::RenderPipelineState *fPlateRenderPipelineState; + void encodeVertexBuffer(MTL::RenderCommandEncoder *encoder); +private: + struct plateVertexBuffer { + hsPoint2 vertices[4]; + hsPoint2 uv[4]; + }; + MTL::Buffer *fVtxBuffer; + MTL::Buffer *idxBuffer; + MTL::DepthStencilState *fDepthState; +}; + +#endif /* plMetalPlateManager_hpp */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp new file mode 100644 index 0000000000..c0664feb24 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp @@ -0,0 +1,84 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "HeadSpin.h" + +#include "plMetalShader.h" + +#include "plSurface/plShader.h" + +#include "plMetalPipeline.h" + +plMetalShader::plMetalShader(plShader* owner) +: fOwner(owner), + fPipe(nil) +{ + owner->SetDeviceRef(this); +} + +plMetalShader::~plMetalShader() +{ + fPipe = nil; + + //ISetError(nil); +} + +void plMetalShader::SetOwner(plShader* owner) +{ + if( owner != fOwner ) + { + Release(); + fOwner = owner; + owner->SetDeviceRef(this); + } +} + +/*HRESULT plMetalShader::IOnError(HRESULT hr, const char* errStr) +{ + ISetError(errStr); + + fOwner->Invalidate(); + + hsStatusMessage(errStr); + + return hr; +}*/ + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h new file mode 100644 index 0000000000..6706b095e2 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h @@ -0,0 +1,76 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plDXShader_inc +#define plDXShader_inc + +#include "plMetalDeviceRef.h" +#include +#include + +class plShader; +class plMetalPipeline; + +class plMetalShader : public plMetalDeviceRef +{ +protected: + plShader* fOwner; + //ST::string fErrorString; + plMetalPipeline* fPipe; + MTL::Function* fFunction; + + //HRESULT IOnError(HRESULT hr, const char* errStr); + //void ISetError(const char* errStr) { fErrorString = errStr; } + + //virtual HRESULT ICreate(plDXPipeline* pipe) = 0; + virtual bool ISetConstants(plMetalPipeline* pipe) = 0; // On error, sets error string. + +public: + plMetalShader(plShader* owner); + virtual ~plMetalShader(); + + //ST::string GetErrorString() const { return fErrorString; } + void SetOwner(plShader* owner); + MTL::Function* GetShader(plMetalPipeline* pipe) { return fFunction; }; +}; + +#endif // plDXShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp new file mode 100644 index 0000000000..afbc76f963 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp @@ -0,0 +1,81 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#include "HeadSpin.h" +#include "hsWindows.h" + +#include + +#include "plMetalVertexShader.h" + +#include "plSurface/plShader.h" + +#include "plDrawable/plGBufferGroup.h" +#include "plMetalPipeline.h" + +plMetalVertexShader::plMetalVertexShader(plShader* owner) +: plMetalShader(owner) +{ +} + +plMetalVertexShader::~plMetalVertexShader() +{ + Release(); +} + +void plMetalVertexShader::Release() +{ + fPipe = nil; + + //ISetError(nil); +} + +bool plMetalVertexShader::ISetConstants(plMetalPipeline* pipe) +{ + if( fOwner->GetNumConsts() ) + { + float *ptr = (float *)fOwner->GetConstBasePtr(); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, BufferIndexUniforms); + } + + return true; +} + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h new file mode 100644 index 0000000000..3a096d097a --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h @@ -0,0 +1,66 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#ifndef plMetalVertexShader_inc +#define plMetalVertexShader_inc + +#include "plMetalShader.h" + +class plShader; +class plMetalPipeline; + +class plMetalVertexShader : public plMetalShader +{ +protected: + + +public: + virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. + plMetalVertexShader(plShader* owner); + virtual ~plMetalVertexShader(); + + virtual void Release(); + void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalVertexShader* GetNext() { return (plMetalVertexShader*)fNext; } +}; + +#endif // plMetalVertexShader_inc diff --git a/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h b/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h index e9d97517f1..54f4c37e9f 100644 --- a/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h +++ b/Sources/Plasma/NucleusLib/inc/plCreatableIndex.h @@ -961,6 +961,7 @@ CLASS_INDEX_LIST_START CLASS_INDEX(plLocalizedConfirmationMsg), CLASS_INDEX(plSubtitleMsg), CLASS_INDEX(plDisplayScaleChangedMsg), + CLASS_INDEX(plMetalPipeline), CLASS_INDEX_LIST_END #endif // plCreatableIndex_inc diff --git a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp index 612716f878..93c9900201 100644 --- a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp +++ b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp @@ -43,6 +43,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #ifdef USE_EFX # include +#else +# if __APPLE__ +# include +# endif #endif #ifdef EAX_SDK_AVAILABLE # include diff --git a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h index 90256ff985..20818f54e3 100644 --- a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h +++ b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h @@ -48,6 +48,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #ifdef USE_EFX # include +#else +#if __APPLE__ +# include +#endif #endif #ifdef EAX_SDK_AVAILABLE # include diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp index a88391cb32..e738f1b406 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp @@ -52,6 +52,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMessage/plMatRefMsg.h" +#include "hsGDeviceRef.h" + plProfile_CreateTimer("MaterialAnims", "Animation", MaterialAnims); plLayer defaultLayer; @@ -202,6 +204,13 @@ void hsGMaterial::SetLayer(plLayerInterface* layer, int32_t which, bool insert, } } +#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL +void hsGMaterial::SetDeviceRef(hsGDeviceRef* ref) +{ + hsRefCnt_SafeAssign(fDeviceRef, ref); +} +#endif + void hsGMaterial::Write(hsStream* s) { diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h index 08ef49fde0..a7bad93b40 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h @@ -51,6 +51,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com class hsScene; class hsResMgr; +class hsGDeviceRef; class hsG3DDevice; class plLayerInterface; class plLayer; @@ -91,6 +92,10 @@ class hsGMaterial : public plSynchedObject uint32_t fLoadFlags; float fLastUpdateTime; + +#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL + hsGDeviceRef* fDeviceRef; +#endif void IClearLayers(); size_t IMakeExtraLayer(); @@ -128,6 +133,11 @@ class hsGMaterial : public plSynchedObject bool IsDynamic() const { return (fCompFlags & kCompDynamic); } bool IsDecal() const { return (fCompFlags & kCompDecal); } bool NeedsBlendChannel() { return (fCompFlags & kCompNeedsBlendChannel); } + +#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL + void SetDeviceRef(hsGDeviceRef* ref); + hsGDeviceRef* GetDeviceRef() const { return fDeviceRef; } +#endif virtual void Read(hsStream* s); virtual void Write(hsStream* s); From 2144231b68a9b002cee3ae05be9fdd2889420136 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 13 Feb 2022 15:26:38 -0800 Subject: [PATCH 002/165] Setting more values to null to prevent crashes --- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 7 +++++++ Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp | 3 +++ 2 files changed, 10 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 2a9a4b415d..4f9fda3a3d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -89,6 +89,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plProfile.h" +uint32_t fDbgSetupInitFlags; // HACK temp only + plProfile_CreateCounter("Feed Triangles", "Draw", DrawFeedTriangles); plProfile_CreateCounter("Draw Prim Static", "Draw", DrawPrimStatic); plProfile_CreateMemCounter("Total Texture Size", "Draw", TotalTexSize); @@ -154,6 +156,11 @@ bool plRenderTriListFunc::RenderPrims() const plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), fPipelineState(nullptr), fCurrentRenderPassUniforms(nullptr), currentDrawableCallback(nullptr), fFragFunction(nullptr), fVShaderRefList(nullptr), fPShaderRefList(nullptr), fULutTextureRef(nullptr) { + fTextureRefList = nullptr; + fVtxBuffRefList = nullptr; + fIdxBuffRefList = nullptr; + fMatRefList = nullptr; + fCurrLayerIdx = 0; fDevice.fPipeline = this; diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp index e738f1b406..e97d5752e3 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp @@ -67,6 +67,9 @@ fLOD(0), fCompFlags(0), fLoadFlags(0), fLastUpdateTime(0) +#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL +,fDeviceRef(nullptr) +#endif { } From 17b4d93754ca7eec8bf79d249659bd9ac2af3176 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 21 Feb 2022 00:28:49 -0800 Subject: [PATCH 003/165] Moving aux spans back onto normal IHandleMaterial I think it was set this way for debugging an issue, but the old code is causing issues itself. --- .../FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp | 1 + Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index a024263d41..927efef7b9 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -332,6 +332,7 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc if (texture != nullptr && encoder) { plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); + hsAssert(offsetFromRootLayer <= 8, "Too many layers requested"); if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeCubicTextures + offsetFromRootLayer); } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 4f9fda3a3d..6e852321ba 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1221,8 +1221,8 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) size_t pass; for (pass = 0; pass < mRef->GetNumPasses(); pass++) { - //IHandleMaterial(material, pass, &span, vRef); -#if 1 + IHandleMaterial(material, pass, &span, vRef); +#if 0 plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); fCurrLayerIdx = mRef->GetPassIndex(pass); From 002bfc43ef754ffe2bab8b8c8025c81eed7e903b Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 22 Feb 2022 16:25:05 -0800 Subject: [PATCH 004/165] Fixing normalized co-ordinate identity crisis Treating normalized co-ordinates as if they were full range. Broke some things with inverted alpha. Stuff like the side of the tent in city should render correct now. --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 9ed3370dd7..f9652815f1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -459,7 +459,7 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { // Local variable to store the color value if (blendFlags & kBlendInvertColor) { - srcSample.rgb = 255 - srcSample.rgb; + srcSample.rgb = 1.0 - srcSample.rgb; } // Leave fCurrColor null if we are blending without texture color @@ -469,7 +469,7 @@ inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t if (blendFlags & kBlendInvertAlpha) { // 1.0 - texture.a - srcSample.a = 255 - srcSample.a; + srcSample.a = 1.0 - srcSample.a; } if (!(blendFlags & kBlendNoTexAlpha)) { @@ -481,7 +481,7 @@ inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t inline void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { // Local variable to store the color value if (blendFlags & kBlendInvertColor) { - srcSample.rgb = 255 - srcSample.rgb; + srcSample.rgb = 1.0 - srcSample.rgb; } switch (blendFlags & kBlendMask) From feac6b97e1f2b245c47442144242b52627097571 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 22 Feb 2022 19:34:25 -0800 Subject: [PATCH 005/165] Fixing texture indexes Texture is being bound to the wrong index. This was causing the cleft to fail to load. --- .../Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h | 2 +- .../FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index b807bc51d0..b131d795aa 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -90,7 +90,7 @@ struct plFragmentShaderLayer { uint uvIndex; uint32_t blendMode; uint32_t miscFlags; - short sampleType; + ushort sampleType; }; struct plMetalFragmentShaderArgumentBuffer { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 927efef7b9..3f0575181e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -137,7 +137,7 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en assert(i - GetPassIndex(pass) >= 0); EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); - IBuildLayerTexture(encoder, i, layer, nullptr); + IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer, nullptr); } encoder->setFragmentBytes(fPassColors[pass], sizeof(simd_float4) * 8, FragmentShaderArgumentAttributeColors); From 286096c547da54e6445d5a54012186fb7a03caf0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 22 Feb 2022 21:43:54 -0800 Subject: [PATCH 006/165] Skipping spans with length 0 Eder Delin has had a 0 span since the resync, which Metal choked on --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 6e852321ba..4542d02d67 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -963,6 +963,10 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, uint32_t vStart, uint32_t vLength, uint32_t iStart, uint32_t iLength) { + if(iLength == 0) { + return; + } + plProfile_BeginTiming(RenderBuff); plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; From 8a72c860ab1afb2053513d310e7a2a9a8c62c960 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 26 Feb 2022 22:35:23 -0800 Subject: [PATCH 007/165] Initial work on proper clear support Should fix menus and partially fix linking books. Books still aren't drawing their contents, but that doesn't seem to be a renderer issue. The scripts that draw the books may not be working for some reason. --- .../pfMetalPipeline/plMetalDevice.cpp | 153 +++++++++++++----- .../pfMetalPipeline/plMetalDevice.h | 9 +- .../pfMetalPipeline/plMetalPipeline.cpp | 5 +- 3 files changed, 121 insertions(+), 46 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 9d9421f29a..a6726c2f60 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -108,61 +108,119 @@ void plMetalDevice::Shutdown() hsAssert(0, "Shutdown not implemented for Metal rendering"); } -void plMetalDevice::SetRenderTarget(plRenderTarget *target) +void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth) { + + if (shouldClearColor) { + fClearColor = clearColor; + } + fShouldClearColor = shouldClearColor; + + if (shouldClearDepth) { + fClearDepth = clearDepth; + } + + if (fCurrentRenderTargetCommandEncoder) { + + printf("Ending render pass, allowing a new one to lazily be created\n"); + + fCurrentRenderTargetCommandEncoder->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; + } + +} + +void plMetalDevice::BeginNewRenderPass() { + + printf("Beginning new render pass\n"); + + //lazilly create the screen render encoder if it does not yet exist + if (!fCurrentOffscreenCommandBuffer && !fCurrentRenderTargetCommandEncoder) { + SetRenderTarget(NULL); + } + + if (fCurrentRenderTargetCommandEncoder) { + //if we have an existing render target, submit it's commands and release it + //if we need to come back to this render target, we can always create a new render + //pass descriptor and submit more commands + fCurrentRenderTargetCommandEncoder->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; + } + + printf("Setting up render pass descriptor\n"); + + MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); + renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearColor.x, fClearColor.y, fClearColor.z, fClearColor.w)); + if (fShouldClearColor) { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + } + + if (fCurrentRenderTarget) { + if ( fCurrentRenderTarget->GetZDepth() ) { + plMetalRenderTargetRef* deviceTarget= (plMetalRenderTargetRef *)fCurrentRenderTarget->GetDeviceRef(); + renderPassDescriptor->depthAttachment()->setTexture(deviceTarget->fDepthBuffer); + renderPassDescriptor->depthAttachment()->setClearDepth(fClearDepth); + renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); + } + fCurrentRenderTargetCommandEncoder = fCurrentOffscreenCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); + } else { + renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); + renderPassDescriptor->depthAttachment()->setClearDepth(fClearDepth); + renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); + fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); + } + +} + +void plMetalDevice::SetRenderTarget(plRenderTarget* target) { - if(fCurrentRenderTargetCommandEncoder) { + if( fCurrentRenderTargetCommandEncoder ) { //if we have an existing render target, submit it's commands and release it //if we need to come back to this render target, we can always create a new render //pass descriptor and submit more commands fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; - + } + + if( fCurrentOffscreenCommandBuffer ) { fCurrentOffscreenCommandBuffer->enqueue(); fCurrentOffscreenCommandBuffer->commit(); fCurrentOffscreenCommandBuffer->release(); fCurrentOffscreenCommandBuffer = nil; } - if(target) { + fCurrentRenderTarget = target; + + if ( fCurrentRenderTarget && fShouldClearColor == false ) { + // clear if a clear color wasn't already set + fClearColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearColor = true; + fClearDepth = 1.0; + } + + if(fCurrentRenderTarget) { + printf("Setting render target\n"); plMetalRenderTargetRef *deviceTarget= (plMetalRenderTargetRef *)target->GetDeviceRef(); fCurrentOffscreenCommandBuffer = fCommandQueue->commandBuffer(); fCurrentOffscreenCommandBuffer->retain(); fCurrentFragmentOutputTexture = deviceTarget->fTexture; - MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); - renderPassDescriptor->colorAttachments()->object(0)->setTexture(deviceTarget->fTexture); - renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); - renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); - renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearColor.x, fClearColor.y, fClearColor.z, fClearColor.w)); - if(deviceTarget->fDepthBuffer) { - renderPassDescriptor->depthAttachment()->setTexture(deviceTarget->fDepthBuffer); - renderPassDescriptor->depthAttachment()->setClearDepth(1.0); - renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); - renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; } else { fCurrentDepthFormat = MTL::PixelFormatInvalid; } - - fCurrentRenderTargetCommandEncoder = fCurrentOffscreenCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); } else { - if(!fDrawableRenderCommandEncoder) { - MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); - renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture()); - renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); - renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); - renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearColor.x, fClearColor.y, fClearColor.z, fClearColor.w)); - fCurrentFragmentOutputTexture = fCurrentDrawable->texture(); - - renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); - renderPassDescriptor->depthAttachment()->setClearDepth(1.0); - renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); - renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); - fDrawableRenderCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); - } - + printf("Setting drawable target\n"); + fCurrentFragmentOutputTexture = fCurrentDrawable->texture(); fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; } } @@ -173,10 +231,10 @@ plMetalDevice::plMetalDevice() fCurrentDrawable(nullptr), fCommandQueue(nullptr), fCurrentRenderTargetCommandEncoder(nullptr), - fDrawableRenderCommandEncoder(nullptr), fCurrentDrawableDepthTexture(nullptr), fCurrentFragmentOutputTexture(nullptr), - fCurrentCommandBuffer(nullptr) + fCurrentCommandBuffer(nullptr), + fCurrentRenderTarget(nullptr) { fClearColor = {0.0, 0.0, 0.0, 1.0}; @@ -537,6 +595,10 @@ uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMi void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice) { + + if(img->GetKeyName() == "RightDTMap2_dynText") { + printf("hi"); + } if (img->IsCompressed()) { for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { @@ -1072,6 +1134,7 @@ plMetalDevice::plMetalLinkedPipeline* plMetalDevice::pipelineStateFor(const plMe void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) { + printf("Creating new command bufffer\n"); fCurrentCommandBuffer = fCommandQueue->commandBuffer(); fCurrentCommandBuffer->retain(); @@ -1108,9 +1171,10 @@ MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() void plMetalDevice::SubmitCommandBuffer() { - fDrawableRenderCommandEncoder->endEncoding(); - fDrawableRenderCommandEncoder->release(); - fDrawableRenderCommandEncoder = nil; + printf("Submitting command bufffer\n"); + fCurrentRenderTargetCommandEncoder->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); fCurrentCommandBuffer->enqueue(); @@ -1122,6 +1186,10 @@ void plMetalDevice::SubmitCommandBuffer() fCurrentDrawable->release(); fCurrentDrawable = nil; + + fClearColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearColor = false; + fClearDepth = 1.0; } MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() @@ -1132,11 +1200,16 @@ MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() return fCurrentRenderTargetCommandEncoder; } - //lazilly create the screen render encoder if it does not yet exist - if(!fDrawableRenderCommandEncoder) { - SetRenderTarget(NULL); + if (!fCurrentRenderTargetCommandEncoder) { + printf("Asked for command encoder, but one not present. Creating..."); + BeginNewRenderPass(); + + fClearColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearColor = false; + fClearDepth = 1.0; } - return fDrawableRenderCommandEncoder; + + return fCurrentRenderTargetCommandEncoder; } CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index ccf30eaa22..704a69dadc 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -170,8 +170,7 @@ class plMetalDevice CA::MetalDrawable* GetCurrentDrawable(); ///Submit the command buffer to the GPU and draws all the render passes. Clears the current command buffer. void SubmitCommandBuffer(); - ///Render encoder to submit draw commands to. This state will automatically reflect the displayable or the current render target, depending on which target has been assigned by Plasma. Will be null if there is no current command buffer. - /// + void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); private: //internal struct for tracking which Metal state goes with which set of @@ -235,12 +234,16 @@ class plMetalDevice MTL::CommandBuffer* fCurrentCommandBuffer; MTL::CommandBuffer* fCurrentOffscreenCommandBuffer; MTL::RenderCommandEncoder* fCurrentRenderTargetCommandEncoder; - MTL::RenderCommandEncoder* fDrawableRenderCommandEncoder; MTL::Texture* fCurrentDrawableDepthTexture; MTL::Texture* fCurrentFragmentOutputTexture; CA::MetalDrawable* fCurrentDrawable; MTL::PixelFormat fCurrentDepthFormat; simd_float4 fClearColor; + bool fShouldClearColor; + float fClearDepth; + plRenderTarget* fCurrentRenderTarget; + + void BeginNewRenderPass(); }; #endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 4542d02d67..0e129145e3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -357,9 +357,8 @@ void plMetalPipeline::ClearRenderTarget(const hsColorRGBA *col, const float *dep if (fView.fRenderState & (kRenderClearColor | kRenderClearDepth)) { hsColorRGBA clearColor = col ? *col : GetClearColor(); float clearDepth = depth ? *depth : fView.GetClearDepth(); - //FIXME: Depth and color are always cleared, do we need to implement handling for not clearing them? - - fDevice.SetClearColor({clearColor.r, clearColor.g, clearColor.b, clearColor.a}); + fDevice.Clear(fView.fRenderState & kRenderClearColor, {clearColor.r, clearColor.g, clearColor.b, clearColor.a}, fView.fRenderState & kRenderClearDepth, 1.0); + fCurrentDepthStencilState = nullptr; } } From c952b7851994cbeb77e309a28dc16cc6d6fb6318 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 28 Feb 2022 22:17:30 -0800 Subject: [PATCH 008/165] Fixes for fast material encoding Was using setFragmentBuffer, not setFragmentBytes. Also was doing fast and then slow encoding back to back. --- .../plMetalMaterialShaderRef.cpp | 8 +++-- .../pfMetalPipeline/plMetalPipeline.cpp | 31 ++++++++++--------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 3f0575181e..0e677e0b06 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -140,8 +140,8 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer, nullptr); } - encoder->setFragmentBytes(fPassColors[pass], sizeof(simd_float4) * 8, FragmentShaderArgumentAttributeColors); - encoder->setFragmentBytes(fPassArgumentBuffers[pass], sizeof(plMetalFragmentShaderArgumentBuffer), BufferIndexFragArgBuffer); + encoder->setFragmentBuffer(fPassColors[pass], 0, FragmentShaderArgumentAttributeColors); + encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, BufferIndexFragArgBuffer); } void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) @@ -284,7 +284,9 @@ void plMetalMaterialShaderRef::ILoopOverLayers() MTL::Buffer *argumentBuffer = fDevice->newBuffer(sizeof(plMetalFragmentShaderArgumentBuffer), MTL::ResourceStorageModeManaged); MTL::Buffer *colorBuffer = fDevice->newBuffer(sizeof(simd_float4) * 8, MTL::ResourceStorageModeManaged); - j = IHandleMaterial(iCurrMat, (plMetalFragmentShaderArgumentBuffer *)argumentBuffer->contents(), nullptr, + plMetalFragmentShaderArgumentBuffer *layerBuffer = (plMetalFragmentShaderArgumentBuffer *)argumentBuffer->contents(); + + j = IHandleMaterial(iCurrMat, layerBuffer, nullptr, [](plLayerInterface* layer, uint32_t index) { return layer; }, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 0e129145e3..ddad4b8a01 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1523,22 +1523,23 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons if(numActivePiggyBacks==0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass); - } + } else { - mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, - [&](plLayerInterface* layer, uint32_t index){ - if(index==0) { - layer = IPushOverBaseLayer(layer); - } - layer = IPushOverAllLayer(layer); - return layer; - }, - [&](plLayerInterface* layer, uint32_t index){ - layer = IPopOverAllLayer(layer); - if(index==0) - layer = IPopOverBaseLayer(layer); - return layer; - }); + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, + [&](plLayerInterface* layer, uint32_t index){ + if(index==0) { + layer = IPushOverBaseLayer(layer); + } + layer = IPushOverAllLayer(layer); + return layer; + }, + [&](plLayerInterface* layer, uint32_t index){ + layer = IPopOverAllLayer(layer); + if(index==0) + layer = IPopOverBaseLayer(layer); + return layer; + }); + } } return true; From bfd45fff96be0ba5d961a134915462ea8200a8ed Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 18 Mar 2022 18:38:10 -0700 Subject: [PATCH 009/165] Integrating the optimized M1 Metal renderer Also fixing several rendering bugs: - Single layer color materials now render correctly - Fixed lighting issues on moving surfaces - Fixed shader compilation crash on Intel HD Metal drivers by removing inline notation from shader functions. No performance decline was observed. --- .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 2 + .../ShaderSrc/FixedPipelineShaders.metal | 370 +++++++------ .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 66 ++- .../pfMetalPipeline/ShaderSrc/ShaderVertex.h | 2 +- .../pfMetalPipeline/plMetalDevice.cpp | 518 +++++------------- .../pfMetalPipeline/plMetalDevice.h | 87 ++- .../plMetalMaterialShaderRef.cpp | 74 ++- .../plMetalMaterialShaderRef.h | 3 + .../pfMetalPipeline/plMetalPipeline.cpp | 226 +++----- .../pfMetalPipeline/plMetalPipeline.h | 2 +- .../pfMetalPipeline/plMetalPipelineState.cpp | 426 ++++++++++++++ .../pfMetalPipeline/plMetalPipelineState.h | 218 ++++++++ .../Plasma/PubUtilLib/plGImage/plMipmap.cpp | 3 + 13 files changed, 1192 insertions(+), 805 deletions(-) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index bd336a210a..d8af9bfcf4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -14,6 +14,7 @@ set(pfMetalPipeline_SOURCES plMetalDeviceRefs.cpp plMetalMaterialShaderRef.cpp plMetalPipeline.cpp + plMetalPipelineState.cpp plMetalPlateManager.cpp plMetalShader.cpp plMetalFragmentShader.cpp @@ -25,6 +26,7 @@ set(pfMetalPipeline_HEADERS plMetalDeviceRef.h plMetalMaterialShaderRef.h plMetalPipeline.h + plMetalPipelineState.h plMetalPlateManager.h plMetalShader.h plMetalFragmentShader.h diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index f9652815f1..3cab28d062 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -54,7 +54,7 @@ using namespace metal; //copying this direction from hsGMatState because I am a horrible person but we can't import the header here in since it includes a lot of class stuff. //FIXME: Come up with something better. -enum hsGMatMiscFlags { +enum hsGMatMiscFlags: uint32_t { kMiscWireFrame = 0x1, // dev (running out of bits) kMiscDrawMeshOutlines = 0x2, // dev, currently unimplemented kMiscTwoSided = 0x4, // view,dev @@ -92,7 +92,7 @@ enum hsGMatMiscFlags { kAllMiscFlags = 0xffffffff }; -enum hsGMatBlendFlags { +enum hsGMatBlendFlags: uint32_t { kBlendTest = 0x1, // dev // Rest of blends are mutually exclusive kBlendAlpha = 0x2, // dev @@ -133,7 +133,7 @@ enum hsGMatBlendFlags { kBlendAlphaPremultiplied = 0x8000000 }; -enum plUVWSrcModifiers { +enum plUVWSrcModifiers: uint32_t { kUVWPassThru = 0x00000000, kUVWIdxMask = 0x0000ffff, kUVWNormal = 0x00010000, @@ -142,15 +142,62 @@ enum plUVWSrcModifiers { }; using namespace metal; - + +constant uint8_t sourceType1 [[ function_constant(FunctionConstantSources + 0) ]]; +constant uint8_t sourceType2 [[ function_constant(FunctionConstantSources + 1) ]]; +constant uint8_t sourceType3 [[ function_constant(FunctionConstantSources + 2) ]]; +constant uint8_t sourceType4 [[ function_constant(FunctionConstantSources + 3) ]]; +constant uint8_t sourceType5 [[ function_constant(FunctionConstantSources + 4) ]]; +constant uint8_t sourceType6 [[ function_constant(FunctionConstantSources + 5) ]]; +constant uint8_t sourceType7 [[ function_constant(FunctionConstantSources + 6) ]]; +constant uint8_t sourceType8 [[ function_constant(FunctionConstantSources + 7) ]]; + +constant uint32_t blendModes1 [[ function_constant(FunctionConstantBlendModes + 0) ]]; +constant uint32_t blendModes2 [[ function_constant(FunctionConstantBlendModes + 1) ]]; +constant uint32_t blendModes3 [[ function_constant(FunctionConstantBlendModes + 2) ]]; +constant uint32_t blendModes4 [[ function_constant(FunctionConstantBlendModes + 3) ]]; +constant uint32_t blendModes5 [[ function_constant(FunctionConstantBlendModes + 4) ]]; +constant uint32_t blendModes6 [[ function_constant(FunctionConstantBlendModes + 5) ]]; +constant uint32_t blendModes7 [[ function_constant(FunctionConstantBlendModes + 6) ]]; +constant uint32_t blendModes8 [[ function_constant(FunctionConstantBlendModes + 7) ]]; + +constant uint32_t miscFlags1 [[ function_constant(FunctionConstantLayerFlags + 0) ]]; +constant uint32_t miscFlags2 [[ function_constant(FunctionConstantLayerFlags + 1) ]]; +constant uint32_t miscFlags3 [[ function_constant(FunctionConstantLayerFlags + 2) ]]; +constant uint32_t miscFlags4 [[ function_constant(FunctionConstantLayerFlags + 3) ]]; +constant uint32_t miscFlags5 [[ function_constant(FunctionConstantLayerFlags + 4) ]]; +constant uint32_t miscFlags6 [[ function_constant(FunctionConstantLayerFlags + 5) ]]; +constant uint32_t miscFlags7 [[ function_constant(FunctionConstantLayerFlags + 6) ]]; +constant uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFlags + 7) ]]; + +#define MAX_BLEND_PASSES 8 +constant const uint8_t sourceTypes[MAX_BLEND_PASSES] = { sourceType1, sourceType2, sourceType3, sourceType4, sourceType5, sourceType6, sourceType7, sourceType8}; +constant const uint32_t blendModes[MAX_BLEND_PASSES] = { blendModes1, blendModes2, blendModes3, blendModes4, blendModes5, blendModes6, blendModes7, blendModes8}; +constant const uint32_t miscFlags[MAX_BLEND_PASSES] = { miscFlags1, miscFlags2, miscFlags3, miscFlags4, miscFlags5, miscFlags6, miscFlags7, miscFlags8}; +constant uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); + typedef struct { - array, 8> textures [[ texture(FragmentShaderArgumentAttributeTextures) ]]; - array, 8> cubicTextures [[ texture(FragmentShaderArgumentAttributeCubicTextures) ]]; - constant float4* colors [[ buffer(FragmentShaderArgumentAttributeColors) ]]; - constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; + texture2d textures [[ texture(FragmentShaderArgumentAttributeTextures), function_constant(hasLayer1) ]]; + texture2d texture2 [[ texture(FragmentShaderArgumentAttributeTextures + 1), function_constant(hasLayer2) ]]; + texture2d texture3 [[ texture(FragmentShaderArgumentAttributeTextures + 2), function_constant(hasLayer3) ]]; + texture2d texture4 [[ texture(FragmentShaderArgumentAttributeTextures + 3), function_constant(hasLayer4) ]]; + texture2d texture5 [[ texture(FragmentShaderArgumentAttributeTextures + 4), function_constant(hasLayer5) ]]; + texture2d texture6 [[ texture(FragmentShaderArgumentAttributeTextures + 5), function_constant(hasLayer6) ]]; + texture2d texture7 [[ texture(FragmentShaderArgumentAttributeTextures + 6), function_constant(hasLayer7) ]]; + texture2d texture8 [[ texture(FragmentShaderArgumentAttributeTextures + 7), function_constant(hasLayer8) ]]; + texturecube cubicTextures [[ texture(FragmentShaderArgumentAttributeCubicTextures), function_constant(hasLayer1) ]]; + texturecube cubicTexture2 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1), function_constant(hasLayer2) ]]; + texturecube cubicTexture3 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 2), function_constant(hasLayer3) ]]; + texturecube cubicTexture4 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3), function_constant(hasLayer4) ]]; + texturecube cubicTexture5 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 4), function_constant(hasLayer5) ]]; + texturecube cubicTexture6 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 5), function_constant(hasLayer6) ]]; + texturecube cubicTexture7 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 6), function_constant(hasLayer7) ]]; + texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasLayer8) ]]; + const constant half4* colors [[ buffer(FragmentShaderArgumentAttributeColors) ]]; + const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; } FragmentShaderArguments; -inline float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix); +float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix); typedef struct { @@ -168,6 +215,35 @@ typedef struct half4 fogColor; //float4 vCamNormal; } ColorInOut; + + +typedef struct +{ + float4 position [[position]]; + float3 texCoord1; +} ShadowCasterInOut; + +constant constexpr sampler colorSamplers[] = { + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::clamp_to_edge, + t_address::repeat), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + s_address::repeat, + t_address::clamp_to_edge), + sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_edge), + +}; vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], @@ -175,61 +251,61 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], { ColorInOut out; //we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. - float4 inColor = float4(in.color.b, in.color.g, in.color.r, in.color.a) / float4(255.0f); + const half4 inColor = half4(in.color.b, in.color.g, in.color.r, in.color.a) / half4(255.0f); - float4 MAmbient = mix(inColor, uniforms.ambientCol, uniforms.ambientSrc); - float4 MDiffuse = mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc); - float4 MEmissive = mix(inColor, uniforms.emissiveCol, uniforms.emissiveSrc); - float4 MSpecular = mix(inColor, uniforms.specularCol, uniforms.specularSrc); + const half4 MAmbient = half4(mix(inColor, uniforms.ambientCol, uniforms.ambientSrc)); + const half4 MDiffuse = half4(mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc)); + const half4 MEmissive = half4(mix(inColor, uniforms.emissiveCol, uniforms.emissiveSrc)); + //const half4 MSpecular = half4(mix(inColor, uniforms.specularCol, uniforms.specularSrc)); - float4 LAmbient = float4(0.0, 0.0, 0.0, 0.0); - float4 LDiffuse = float4(0.0, 0.0, 0.0, 0.0); + half4 LAmbient = half4(0.0, 0.0, 0.0, 0.0); + half4 LDiffuse = half4(0.0, 0.0, 0.0, 0.0); - float3 Ndirection = normalize(uniforms.worldToLocalMatrix * float4(in.normal, 0.0)).xyz; + float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz; - for (uint i = 0; i < 8; i++) { - plMetalShaderLightSource lightSource = uniforms.lampSources[i]; - if(lightSource.scale == 0) + for (size_t i = 0; i < 8; i++) { + constant plMetalShaderLightSource *lightSource = &uniforms.lampSources[i]; + if(lightSource->scale == 0.0h) continue; float attenuation; float3 direction; - if (lightSource.position.w == 0.0) { + if (lightSource->position.w == 0.0) { // Directional Light with no attenuation - direction = -(lightSource.direction).xyz; + direction = -(lightSource->direction).xyz; attenuation = 1.0; } else { // Omni Light in all directions - float3 v2l = lightSource.position.xyz - float3(uniforms.localToWorldMatrix * float4(in.position, 1.0)); - float distance = length(v2l); + const float3 v2l = lightSource->position.xyz - float3(uniforms.localToWorldMatrix * float4(in.position, 1.0)); + const float distance = length(v2l); direction = normalize(v2l); - attenuation = 1.0 / (lightSource.constAtten + lightSource.linAtten * distance + lightSource.quadAtten * pow(distance, 2.0)); + attenuation = 1.0 / (lightSource->constAtten + lightSource->linAtten * distance + lightSource->quadAtten * pow(distance, 2.0)); if (uniforms.lampSources[i].spotProps.x > 0.0) { // Spot Light with cone falloff - float a = dot(direction.xyz, normalize(-lightSource.direction).xyz); - float theta = lightSource.spotProps.y; - float phi = lightSource.spotProps.z; - float result = pow((a - phi) / (theta - phi), lightSource.spotProps.x); + const float a = dot(direction.xyz, normalize(-lightSource->direction).xyz); + const float theta = lightSource->spotProps.y; + const float phi = lightSource->spotProps.z; + const float result = pow((a - phi) / (theta - phi), lightSource->spotProps.x); attenuation *= clamp(result, 0.0, 1.0); } } - LAmbient.rgb = LAmbient.rgb + attenuation * (uniforms.lampSources[i].ambient.rgb * uniforms.lampSources[i].scale); + LAmbient.rgb = LAmbient.rgb + half3(attenuation * (uniforms.lampSources[i].ambient.rgb * uniforms.lampSources[i].scale)); float3 dotResult = dot(Ndirection, direction); - LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (uniforms.lampSources[i].diffuse.rgb * uniforms.lampSources[i].scale) * max(0.0, dotResult) * attenuation; + LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (uniforms.lampSources[i].diffuse.rgb * uniforms.lampSources[i].scale) * half3(max(0.0, dotResult) * attenuation); } - float4 ambient = clamp(float4(MAmbient) * (uniforms.globalAmb + LAmbient), 0.0, 1.0); - float4 diffuse = clamp(LDiffuse, 0.0, 1.0); - float4 material = clamp(ambient + diffuse + float4(MEmissive), 0.0, 1.0); + const half4 ambient = clamp((MAmbient) * (half4(uniforms.globalAmb) + LAmbient), 0.0, 1.0); + const half4 diffuse = clamp(LDiffuse, 0.0, 1.0); + const half4 material = clamp(ambient + diffuse + half4(MEmissive), 0.0, 1.0); - out.vtxColor = half4(float4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a))); + out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); - float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + const float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0)); //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); //Fog @@ -238,41 +314,41 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], out.fogColor.a = exp(-pow(uniforms.fogValues.y * length(vCamPosition), uniforms.fogValues.x)); } else { if (uniforms.fogValues.y > 0.0) { - float start = uniforms.fogValues.x; - float end = uniforms.fogValues.y; + const float start = uniforms.fogValues.x; + const float end = uniforms.fogValues.y; out.fogColor.a = (end - length(vCamPosition.xyz)) / (end - start); } } - out.fogColor.rgb = half3(uniforms.fogColor); + out.fogColor.rgb = uniforms.fogColor; - float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0)); + const float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0)); if(hasLayer1) - out.texCoord1 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[0].transform, uniforms.uvTransforms[0].UVWSrc, uniforms.uvTransforms[0].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord1 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[0].transform, uniforms.uvTransforms[0].UVWSrc, miscFlags1, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); if(hasLayer2) - out.texCoord2 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[1].transform, uniforms.uvTransforms[1].UVWSrc, uniforms.uvTransforms[1].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord2 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[1].transform, uniforms.uvTransforms[1].UVWSrc, miscFlags2, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); if(hasLayer3) - out.texCoord3 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[2].transform, uniforms.uvTransforms[2].UVWSrc, uniforms.uvTransforms[2].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord3 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[2].transform, uniforms.uvTransforms[2].UVWSrc, miscFlags3, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); if(hasLayer4) - out.texCoord4 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[3].transform, uniforms.uvTransforms[3].UVWSrc, uniforms.uvTransforms[3].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord4 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[3].transform, uniforms.uvTransforms[3].UVWSrc, miscFlags4, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); if(hasLayer5) - out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[4].transform, uniforms.uvTransforms[4].UVWSrc, uniforms.uvTransforms[4].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[4].transform, uniforms.uvTransforms[4].UVWSrc, miscFlags5, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); if(hasLayer6) - out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[5].transform, uniforms.uvTransforms[5].UVWSrc, uniforms.uvTransforms[5].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[5].transform, uniforms.uvTransforms[5].UVWSrc, miscFlags6, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); if(hasLayer7) - out.texCoord7 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[6].transform, uniforms.uvTransforms[6].UVWSrc, uniforms.uvTransforms[6].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord7 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[6].transform, uniforms.uvTransforms[6].UVWSrc, miscFlags7, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); if(hasLayer8) - out.texCoord8 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[7].transform, uniforms.uvTransforms[7].UVWSrc, uniforms.uvTransforms[7].flags, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord8 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[7].transform, uniforms.uvTransforms[7].UVWSrc, miscFlags8, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); out.position = uniforms.projectionMatrix * vCamPosition; return out; } -inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags); -inline void blend(half4 srcSample, thread half4 &destSample, uint32_t blendFlags); +void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags); +void blend(half4 srcSample, thread half4 &destSample, uint32_t blendFlags); -inline float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix) { +float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix) { //Note: If we want to require newer versions of Metal/newer hardware we could pass function pointers instead of doing these ifs. if (flags & (kMiscUseReflectionXform | kMiscUseRefractionXform)) { matrix = camToWorldMatrix; @@ -373,56 +449,29 @@ inline float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, c break; default: { - int index = UVWSrc & 0x0f; + const int index = UVWSrc & 0x0F; sampleCoord = matrix * float4(texCoords[index], 1.0); } break; } return sampleCoord.xyz; } - -half4 blendLayer(plFragmentShaderLayer layer, float3 sampleCoord, half4 color, texture2d texture, thread texturecube *cubicTexture) { - - constexpr sampler colorSamplers[] = { - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::clamp_to_edge, - t_address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::repeat, - t_address::clamp_to_edge), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::clamp_to_edge), - - }; - - ushort passType = layer.passType; + +half4 sampleLayer(uint8_t passType, uint8_t sampleType, uint32_t miscFlags, float3 sampleCoord, const thread half4 &color, const thread texture2d &texture, const thread texturecube &cubicTexture) { if(passType == PassTypeColor) { return color; } else { - if (layer.miscFlags & kMiscPerspProjection) { - sampleCoord.xy = sampleCoord.xy / sampleCoord.z; + if (miscFlags & kMiscPerspProjection) { + sampleCoord.xy /= sampleCoord.z; } - int colorSamplerIndex = layer.sampleType; //do the actual sample if(passType == PassTypeTexture) { - texture2d colorMap = texture; - return colorMap.sample(colorSamplers[colorSamplerIndex], sampleCoord.xy); + return texture.sample(colorSamplers[sampleType], sampleCoord.xy); } else if(passType == PassTypeCubicTexture) { - thread texturecube *colorMap = cubicTexture; - return colorMap->sample(colorSamplers[colorSamplerIndex], sampleCoord.xyz); + return cubicTexture.sample(colorSamplers[sampleType], sampleCoord.xyz); } else { return half4(0); } @@ -430,36 +479,52 @@ half4 blendLayer(plFragmentShaderLayer layer, float3 sampleCoord, half4 color, } fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - FragmentShaderArguments fragmentShaderArgs) + const FragmentShaderArguments fragmentShaderArgs) { + half4 currentColor = in.vtxColor; - uint i = 0; - for(i=i; i< num_layers; i++) { - plFragmentShaderLayer layer = fragmentShaderArgs.bufferedUniforms->layers[i]; + /* + SPECIAL PLASMA RULE: + If there is only one layer, and that layer is not a texture, + skip straight to the vertex color and return it + */ + if (!(passCount==1 && sourceTypes[0] == PassTypeColor)) { - thread texturecube* cubicTexture = &(fragmentShaderArgs.cubicTextures[i]); - half4 color = blendLayer(layer, (&in.texCoord1)[i], currentColor, fragmentShaderArgs.textures[i], cubicTexture); - if(i==0) { - blendFirst(color, currentColor, layer.blendMode); - } else { - blend(color, currentColor, layer.blendMode); + half4 color; + + /* + Note: For loop should be unrolled by the compiler, but it is very sensitive. + Always use size_t for the loop interator type. + */ + for(size_t layer=0; layerlayers[layer].sampleType, miscFlags[layer], sampleCoord, half4(in.vtxColor), (&fragmentShaderArgs.textures)[layer], (&fragmentShaderArgs.cubicTextures)[layer]); + + if(layer==0) { + blendFirst(color, currentColor, blendModes[layer]); + } else { + blend(color, currentColor, blendModes[layer]); + } } + + currentColor = half4(in.vtxColor.rgb, 1.0) * currentColor; } - currentColor = half4(in.vtxColor.rgb, 1.0) * currentColor; - currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb * currentColor.a, 1.0f - clamp((float)in.fogColor.a, 0.0f, 1.0f)); + currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb, 1.0f - clamp((float)in.fogColor.a, 0.0f, 1.0f)); if (currentColor.a < fragmentShaderArgs.bufferedUniforms->alphaThreshold) { discard_fragment(); } return currentColor; } -inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { +void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { + // Local variable to store the color value if (blendFlags & kBlendInvertColor) { - srcSample.rgb = 1.0 - srcSample.rgb; + srcSample.rgb = 1.0h - srcSample.rgb; } // Leave fCurrColor null if we are blending without texture color @@ -469,7 +534,7 @@ inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t if (blendFlags & kBlendInvertAlpha) { // 1.0 - texture.a - srcSample.a = 1.0 - srcSample.a; + srcSample.a = 1.0h - srcSample.a; } if (!(blendFlags & kBlendNoTexAlpha)) { @@ -478,10 +543,10 @@ inline void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t } } -inline void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { +void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { // Local variable to store the color value if (blendFlags & kBlendInvertColor) { - srcSample.rgb = 1.0 - srcSample.rgb; + srcSample.rgb = 1.0h - srcSample.rgb; } switch (blendFlags & kBlendMask) @@ -505,7 +570,7 @@ inline void blend(half4 srcSample, thread half4 &destSample, const uint32_t blen if (blendFlags & kBlendInvertAlpha) { // 1.0 - texture.a - srcSample.a = 1.0 - srcSample.a; + srcSample.a = 1.0h - srcSample.a; } else { // texture.a srcSample.a = srcSample.a; @@ -546,7 +611,7 @@ inline void blend(half4 srcSample, thread half4 &destSample, const uint32_t blen case kBlendAddSigned: { // color = color + prev - 0.5 - destSample.rgb = srcSample.rgb + destSample.rgb - 0.5; + destSample.rgb = srcSample.rgb + destSample.rgb - 0.5h; break; } @@ -554,7 +619,7 @@ inline void blend(half4 srcSample, thread half4 &destSample, const uint32_t blen { // color = (color + prev - 0.5) << 1 // Note: using CALL here for multiplication to ensure parentheses - destSample.rgb = 2 * (srcSample.rgb + destSample.rgb - 0.5); + destSample.rgb = 2.0h * (srcSample.rgb + destSample.rgb - 0.5h); break; } @@ -567,93 +632,70 @@ inline void blend(half4 srcSample, thread half4 &destSample, const uint32_t blen } } -fragment float4 shadowFragmentShader(ColorInOut in [[stage_in]], +vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - texture2d colorMap [[ texture(0) ]]) + uint v_id [[vertex_id]]) { - constexpr sampler colorSamplers[] = { - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::clamp_to_edge, - t_address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::repeat, - t_address::clamp_to_edge), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::clamp_to_edge), - - }; + ShadowCasterInOut out; + + const float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + //out.texCoord1 = (uniforms.uvTransforms[0].transform * vCamPosition).xyz; + out.texCoord1 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[0].transform, uniforms.uvTransforms[0].UVWSrc, 0, float4(0.0), vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + + out.position = uniforms.projectionMatrix * vCamPosition; + + return out; +} + +fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]], + texture2d colorMap [[ texture(0) ]]) +{ //D3DTTFF_COUNT3, D3DTSS_TCI_CAMERASPACEPOSITION - ushort4 currentColor = colorMap.sample(colorSamplers[3], in.texCoord1.xy); + short currentAlpha = colorMap.sample(colorSamplers[3], float2(in.texCoord1.xy)).a; - return float4(1.0, 1.0, 1.0, float(currentColor.a)/255.0f); + return half4(1.0h, 1.0h, 1.0h, half(currentAlpha)/255.0h); } -fragment float4 shadowCastFragmentShader(ColorInOut in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], +fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], texture2d texture [[ texture(16) ]], texture2d LUT [[ texture(17) ]], constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(BufferIndexShadowCastFragArgBuffer) ]], FragmentShaderArguments layers, constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowAlphaSrc) ]]) { - - constexpr sampler colorSamplers[] = { - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::clamp_to_edge, - t_address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::repeat, - t_address::clamp_to_edge), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::clamp_to_edge), - - }; - float3 sampleCoords = in.texCoord1; if(fragmentUniforms.pointLightCast) { sampleCoords.xy /= sampleCoords.z; } - float4 currentColor = float4(texture.sample(colorSamplers[3], sampleCoords.xy)); - currentColor.rgb *= float3(in.vtxColor.rgb); + half4 currentColor = half4(texture.sample(colorSamplers[3], sampleCoords.xy)); + currentColor.rgb *= in.vtxColor.rgb; - float3 LUTCoords = in.texCoord2; - float4 LUTColor = float4(LUT.sample(colorSamplers[3], LUTCoords.xy))/255.0f; + const float2 LUTCoords = in.texCoord2.xy; + const half4 LUTColor = half4(LUT.sample(colorSamplers[3], LUTCoords))/255.0h; currentColor.rgb = (1.0 - LUTColor.rgb) * currentColor.rgb; currentColor.a = LUTColor.a - currentColor.a; - if(alphaSrc != -1) { - half4 layerColor = blendLayer(layers.bufferedUniforms->layers[alphaSrc], in.texCoord3, half4(layers.colors[alphaSrc]), layers.textures[alphaSrc], nullptr); + //only possible alpha sources are layers 0 or 1 + if(alphaSrc == 0) { + + half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[0].sampleType, miscFlags[2], in.texCoord3, half4(layers.colors[0]), (&layers.textures)[0], (&layers.cubicTextures)[0]); + + currentColor.rgb *= layerColor.a; + currentColor.rgb *= in.vtxColor.a; + } else if(alphaSrc == 1) { + + half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[1].sampleType, miscFlags[2], in.texCoord3, half4(layers.colors[1]), (&layers.textures)[1], (&layers.cubicTextures)[1]); currentColor.rgb *= layerColor.a; - currentColor.rgb *= uniforms.diffuseCol.a; + currentColor.rgb *= in.vtxColor.a; } //alpha blend goes here - if(currentColor.a <= 0.0) + if(currentColor.a <= 0.0h) discard_fragment(); return currentColor; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index b131d795aa..8a9a64fbf0 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -45,6 +45,13 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include +#ifndef __METAL_VERSION__ +typedef _Float16 half; +typedef __attribute__((__ext_vector_type__(2))) half half2; +typedef __attribute__((__ext_vector_type__(3))) half half3; +typedef __attribute__((__ext_vector_type__(4))) half half4; +#endif + enum plMetalShaderArgumentIndex { //Texture is a legacy argument for the simpler plate shader @@ -75,10 +82,13 @@ enum plMetalFragmentShaderUniform enum plMetalFunctionConstant { FunctionConstantNumUVs = 0, - FunctionConstantNumLayers = 1 + FunctionConstantNumLayers = 1, + FunctionConstantSources = 2, + FunctionConstantBlendModes = 10, + FunctionConstantLayerFlags = 18 }; -enum plMetalLayerPassType +enum plMetalLayerPassType: uint8_t { PassTypeTexture = 1, PassTypeCubicTexture = 2, @@ -86,16 +96,12 @@ enum plMetalLayerPassType }; struct plFragmentShaderLayer { - ushort passType; - uint uvIndex; - uint32_t blendMode; - uint32_t miscFlags; - ushort sampleType; + uint8_t sampleType; }; struct plMetalFragmentShaderArgumentBuffer { - ushort layerCount; - float alphaThreshold; + uint8_t layerCount; + __fp16 alphaThreshold; plFragmentShaderLayer layers[8]; }; @@ -112,21 +118,21 @@ enum plMetalFragmentShaderTextures { struct plMetalShaderLightSource { simd::float4 position; - simd::float4 ambient; - simd::float4 diffuse; - simd::float4 specular; + half4 ambient; + half4 diffuse; + half4 specular; simd::float3 direction; simd::float4 spotProps; // (falloff, theta, phi) - float constAtten; - float linAtten; - float quadAtten; - float scale; + __fp16 constAtten; + __fp16 linAtten; + __fp16 quadAtten; + __fp16 scale; }; typedef struct { - uint UVWSrc; - uint flags; + uint32_t UVWSrc; + uint32_t flags; matrix_float4x4 transform; } UVOutDescriptor; @@ -140,24 +146,24 @@ typedef struct matrix_float4x4 worldToCameraMatrix; //lighting - simd::float4 globalAmb; - simd::float4 ambientCol; - float ambientSrc; - simd::float4 diffuseCol; - float diffuseSrc; - simd::float4 emissiveCol; - float emissiveSrc; - simd::float4 specularCol; - float specularSrc; + half4 globalAmb; + half4 ambientCol; + uint8_t ambientSrc; + half4 diffuseCol; + uint8_t diffuseSrc; + half4 emissiveCol; + uint8_t emissiveSrc; + half4 specularCol; + uint8_t specularSrc; bool invVtxAlpha; - uint fogExponential; + uint8_t fogExponential; simd::float2 fogValues; - simd::float3 fogColor; + half3 fogColor; plMetalShaderLightSource lampSources[8]; - uint numUVSrcs; + uint8_t numUVSrcs; UVOutDescriptor uvTransforms[8]; } VertexUniforms; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h index 8859dc60f3..1e6f879734 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h @@ -51,7 +51,7 @@ constant bool hasTexture6 = num_uvs > 5; constant bool hasTexture7 = num_uvs > 6; constant bool hasTexture8 = num_uvs > 7; -constant ushort num_layers [[ function_constant(FunctionConstantNumLayers) ]]; +constant uint8_t num_layers [[ function_constant(FunctionConstantNumLayers) ]]; constant bool hasLayer1 = num_layers > 0; constant bool hasLayer2 = num_layers > 1; constant bool hasLayer3 = num_layers > 2; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index a6726c2f60..7753787d56 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -57,6 +57,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plGImage/plCubicEnvironmap.h" #include "plPipeline/plRenderTarget.h" +#include "plMetalPipelineState.h" + matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder) { if (src.fFlags & hsMatrix44::kIsIdent) @@ -120,9 +122,6 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh } if (fCurrentRenderTargetCommandEncoder) { - - printf("Ending render pass, allowing a new one to lazily be created\n"); - fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; @@ -132,7 +131,7 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh void plMetalDevice::BeginNewRenderPass() { - printf("Beginning new render pass\n"); + //printf("Beginning new render pass\n"); //lazilly create the screen render encoder if it does not yet exist if (!fCurrentOffscreenCommandBuffer && !fCurrentRenderTargetCommandEncoder) { @@ -148,8 +147,6 @@ void plMetalDevice::BeginNewRenderPass() { fCurrentRenderTargetCommandEncoder = nil; } - printf("Setting up render pass descriptor\n"); - MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); @@ -207,7 +204,6 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) } if(fCurrentRenderTarget) { - printf("Setting render target\n"); plMetalRenderTargetRef *deviceTarget= (plMetalRenderTargetRef *)target->GetDeviceRef(); fCurrentOffscreenCommandBuffer = fCommandQueue->commandBuffer(); fCurrentOffscreenCommandBuffer->retain(); @@ -219,7 +215,6 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) fCurrentDepthFormat = MTL::PixelFormatInvalid; } } else { - printf("Setting drawable target\n"); fCurrentFragmentOutputTexture = fCurrentDrawable->texture(); fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; } @@ -234,7 +229,9 @@ plMetalDevice::plMetalDevice() fCurrentDrawableDepthTexture(nullptr), fCurrentFragmentOutputTexture(nullptr), fCurrentCommandBuffer(nullptr), - fCurrentRenderTarget(nullptr) + fCurrentOffscreenCommandBuffer(nullptr), + fCurrentRenderTarget(nullptr), + fNewPipelineStateMap() { fClearColor = {0.0, 0.0, 0.0, 1.0}; @@ -375,7 +372,8 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr return; } - ref->SetBuffer(fMetalDevice->newBuffer(size, MTL::StorageModeManaged)->autorelease()); + MTL::Buffer* metalBuffer = fMetalDevice->newBuffer(size, MTL::StorageModeManaged); + ref->SetBuffer(metalBuffer); uint8_t* buffer = (uint8_t*) ref->GetBuffer()->contents(); if (ref->fData) @@ -435,6 +433,8 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr hsAssert((ptr - buffer) == size, "Didn't fill the buffer?"); } + + metalBuffer->release(); /// Unlock and clean up ref->SetRebuiltSinceUsed(true); @@ -514,8 +514,9 @@ void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef *iRef, plGB iRef->PrepareForWrite(); MTL::Buffer* indexBuffer = iRef->GetBuffer(); if(!indexBuffer || indexBuffer->length() < size) { - indexBuffer = fMetalDevice->newBuffer(size, MTL::ResourceStorageModeManaged)->autorelease(); + indexBuffer = fMetalDevice->newBuffer(size, MTL::ResourceStorageModeManaged); iRef->SetBuffer(indexBuffer); + indexBuffer->release(); } memcpy(indexBuffer->contents(), owner->GetIndexBufferData(idx), size); @@ -595,10 +596,6 @@ uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMi void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice) { - - if(img->GetKeyName() == "RightDTMap2_dynText") { - printf("hi"); - } if (img->IsCompressed()) { for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { @@ -652,14 +649,18 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *i tRef->SetDirty(false); } -void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef *tRef, plLayerInterface *layer, plMipmap *img) +void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* img) { if (!img->GetImage()) { return; } + if(tRef->fTexture) { + tRef->fTexture->release(); + } + tRef->fLevels = img->GetNumLevels() - 1; - if(!tRef->fTexture) { + //if(!tRef->fTexture) { ConfigureAllowedLevels(tRef, img); //texture doesn't exist yet, create it bool supportsMipMap = tRef->fLevels; @@ -675,11 +676,13 @@ void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef *tRef, plLayerInter descriptor->setMipmapLevelCount(tRef->fLevels + 1); } tRef->fTexture = fMetalDevice->newTexture(descriptor); - } + //} PopulateTexture( tRef, img, 0); + + tRef->SetDirty(false); } -void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plLayerInterface *layer, plCubicEnvironmap *img) +void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plCubicEnvironmap *img) { MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::textureCubeDescriptor(tRef->fFormat, img->GetFace(0)->GetWidth(), tRef->fLevels != 0); @@ -705,6 +708,8 @@ void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plLayer for (size_t i = 0; i < 6; i++) { PopulateTexture( tRef, img->GetFace(i), kFaceMapping[i]); } + + tRef->SetDirty(false); } void plMetalDevice::SetProjectionMatrix(const hsMatrix44& src) @@ -730,376 +735,104 @@ void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder) hsMatrix2SIMD(inv, &fMatrixW2L, swapOrder); } -plMetalDevice::plPipelineStateAtrributes::plPipelineStateAtrributes(const plMetalVertexBufferRef * vRef, const uint32_t blendFlags, const MTL::PixelFormat outputPixelFormat, const MTL::PixelFormat outputDepthFormat, const plShaderID::ID vertexShaderID, const plShaderID::ID fragmentShaderID, const int forShadows, const uint numLayers) -{ - numUVs = plGBufferGroup::CalcNumUVs(vRef->fFormat); - numWeights = (vRef->fFormat & plGBufferGroup::kSkinWeightMask) >> 4; - hasSkinIndices = (vRef->fFormat & plGBufferGroup::kSkinIndices); - outputFormat = outputPixelFormat; - this->depthFormat = outputDepthFormat; - this->blendFlags = blendFlags; - this->vertexShaderID = vertexShaderID; - this->fragmentShaderID = fragmentShaderID; - this->forShadows = forShadows; - this->numLayers = numLayers; -} - - -std::condition_variable * plMetalDevice::prewarmPipelineStateFor(plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, bool forShadows) -{ - plPipelineStateAtrributes attributes = plPipelineStateAtrributes(vRef, blendFlags, fCurrentFragmentOutputTexture->pixelFormat(), fCurrentDepthFormat, vertexShaderID, fragmentShaderID, forShadows, numLayers); - //only render thread is allowed to prewarm, no race conditions around - //fConditionMap creation - if(!fPipelineStateMap[attributes] && fConditionMap[attributes]) { - std::condition_variable *condOut; - StartRenderPipelineBuild(attributes, &condOut); - return condOut; - } - return nullptr; -} - -void plMetalDevice::StartRenderPipelineBuild(plPipelineStateAtrributes &attributes, std::condition_variable **condOut) +void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) { - /* - Shader building requires both knowledge of the vertex buffer layout and the fragment shader details. For now it lives here. The caching and threading mechanism should be factored out so that OpenGL can share them. Vector buffer dependencies should be factored out so we only need material details. That also means we can use the threading to create these earlier in a render pass. - */ - int vertOffset = 0; - int skinWeightOffset = vertOffset + (sizeof(float) * 3); - if(attributes.hasSkinIndices) { - skinWeightOffset += sizeof(uint32_t); - } - int normOffset = skinWeightOffset + (sizeof(float) * attributes.numWeights); - int colorOffset = normOffset + (sizeof(float) * 3); - int baseUvOffset = colorOffset + (sizeof(uint32_t) * 2); - int stride = baseUvOffset + (sizeof(float) * 3 * attributes.numUVs); - - MTL::Library *library = fMetalDevice->newDefaultLibrary(); - - MTL::FunctionConstantValues *functionContents = MTL::FunctionConstantValues::alloc()->init(); - functionContents->setConstantValue(&attributes.numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); - functionContents->setConstantValue(&attributes.numLayers, MTL::DataTypeUShort, FunctionConstantNumLayers); - MTL::Function *fragFunction; - MTL::Function *vertFunction; - - if(!attributes.vertexShaderID && !attributes.fragmentShaderID) { - if(attributes.forShadows == 1) { - fragFunction = library->newFunction( - NS::String::string("shadowFragmentShader", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - } else if(attributes.forShadows == 2) { - fragFunction = library->newFunction( - NS::String::string("shadowCastFragmentShader", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - } else { - fragFunction = library->newFunction( - NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - } - vertFunction = library->newFunction( - NS::String::string("pipelineVertexShader", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - } else if(attributes.vertexShaderID && attributes.fragmentShaderID) { - switch(attributes.vertexShaderID) { - case plShaderID::vs_WaveFixedFin7: - vertFunction = library->newFunction( - NS::String::string("vs_WaveFixedFin7", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::vs_CompCosines: - vertFunction = library->newFunction( - NS::String::string("vs_CompCosines", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::vs_BiasNormals: - vertFunction = library->newFunction( - NS::String::string("vs_BiasNormals", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::vs_GrassShader: - vertFunction = library->newFunction( - NS::String::string("vs_GrassShader", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::vs_WaveDecEnv_7: - vertFunction = library->newFunction( - NS::String::string("vs_WaveDecEnv_7", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - default: - hsAssert(0, "unknown shader requested"); + fCurrentCommandBuffer = fCommandQueue->commandBuffer(); + fCurrentCommandBuffer->retain(); + //cache the depth buffer, we'll just clear it every time. + if(fCurrentDrawableDepthTexture == nullptr || + drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || + drawable->texture()->height() != fCurrentDrawableDepthTexture->height() + ) { + if(fCurrentDrawableDepthTexture) { + fCurrentDrawableDepthTexture->release(); } - switch(attributes.fragmentShaderID) { - case plShaderID::ps_WaveFixed: - fragFunction = library->newFunction( - NS::String::string("ps_WaveFixed", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::ps_MoreCosines: - fragFunction = library->newFunction( - NS::String::string("ps_CompCosines", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::ps_BiasNormals: - fragFunction = library->newFunction( - NS::String::string("ps_BiasNormals", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::ps_GrassShader: - fragFunction = library->newFunction( - NS::String::string("ps_GrassShader", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - case plShaderID::ps_WaveDecEnv: - fragFunction = library->newFunction( - NS::String::string("ps_WaveDecEnv", NS::ASCIIStringEncoding), - functionContents, - (NS::Error **)NULL - ); - break; - default: - hsAssert(0, "unknown shader requested"); + MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + drawable->texture()->width(), + drawable->texture()->height(), + false); + if(fMetalDevice->hasUnifiedMemory()) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); } - } else { - hsAssert(0, "Pipeline only supports both fragment and vertex shaders together"); + depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + + fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); } + fCurrentDrawable = drawable->retain(); +} + +void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable **condOut) { - MTL::VertexDescriptor *vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); - - vertexDescriptor->attributes()->object(VertexAttributePosition)->setFormat(MTL::VertexFormatFloat3); - vertexDescriptor->attributes()->object(VertexAttributePosition)->setBufferIndex(0); - vertexDescriptor->attributes()->object(VertexAttributePosition)->setOffset(vertOffset); - - vertexDescriptor->attributes()->object(VertexAttributeNormal)->setFormat(MTL::VertexFormatFloat3); - vertexDescriptor->attributes()->object(VertexAttributeNormal)->setBufferIndex(0); - vertexDescriptor->attributes()->object(VertexAttributeNormal)->setOffset(normOffset); - - for(int i=0; iattributes()->object(VertexAttributeTexcoord+i)->setFormat(MTL::VertexFormatFloat3); - vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setBufferIndex(0); - vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setOffset(baseUvOffset + (i * sizeof(float) * 3)); + __block std::condition_variable *newCondition = new std::condition_variable(); + fConditionMap[record] = newCondition; + if(condOut) { + *condOut = newCondition; } - vertexDescriptor->attributes()->object(VertexAttributeColor)->setFormat(MTL::VertexFormatUChar4); - vertexDescriptor->attributes()->object(VertexAttributeColor)->setBufferIndex(0); - vertexDescriptor->attributes()->object(VertexAttributeColor)->setOffset(colorOffset); + if (fNewPipelineStateMap[record] != NULL) { + return fNewPipelineStateMap[record]; + } - vertexDescriptor->layouts()->object(VertexAttributePosition)->setStride(stride); + MTL::Library *library = fMetalDevice->newDefaultLibrary(); - MTL::RenderPipelineDescriptor *descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + std::shared_ptr pipelineState = record.state; - descriptor->setDepthAttachmentPixelFormat(attributes.depthFormat); + MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + descriptor->setLabel(pipelineState->GetDescription()); + const MTL::Function* vertexFunction = pipelineState->GetVertexFunction(library); + const MTL::Function* fragmentFunction = pipelineState->GetFragmentFunction(library); + descriptor->setVertexFunction(vertexFunction); + descriptor->setFragmentFunction(fragmentFunction); descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); - // No color, just writing out Z values. - if(attributes.forShadows == 1) { - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); - } else if(attributes.forShadows == 2) { - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); - } else if (attributes.blendFlags & hsGMatState::kBlendNoColor) { - //printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); - } else { - switch (attributes.blendFlags & hsGMatState::kBlendMask) - { - // Detail is just a special case of alpha, handled in construction of the texture - // mip chain by making higher levels of the chain more transparent. - case hsGMatState::kBlendDetail: - case hsGMatState::kBlendAlpha: - if (attributes.blendFlags & hsGMatState::kBlendInvertFinalAlpha) { - if (attributes.blendFlags & hsGMatState::kBlendAlphaPremultiplied) { - //printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); - } else { - //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); - } - } else { - if (attributes.blendFlags & hsGMatState::kBlendAlphaPremultiplied) { - //printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); - } else { - //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); - } - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - } - break; - - // Multiply the final color onto the frame buffer. - case hsGMatState::kBlendMult: - if (attributes.blendFlags & hsGMatState::kBlendInvertFinalColor) { - //printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceColor); - } else { - //printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorSourceColor); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceColor); - } - break; - - // Add final color to FB. - case hsGMatState::kBlendAdd: - //printf("glBlendFunc(GL_ONE, GL_ONE);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); - break; - - // Multiply final color by FB color and add it into the FB. - case hsGMatState::kBlendMADD: - //printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorDestinationColor); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorDestinationColor); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); - break; - - // Final color times final alpha, added into the FB. - case hsGMatState::kBlendAddColorTimesAlpha: - if (attributes.blendFlags & hsGMatState::kBlendInvertFinalAlpha) { - //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); - } else { - //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); - } - break; - - // Overwrite final color onto FB - case 0: - //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); - descriptor->colorAttachments()->object(0)->setRgbBlendOperation(MTL::BlendOperationAdd); - //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); - - /*descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero);*/ - break; - - default: - { - /*hsAssert(false, "Too many blend modes specified in material"); - plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); - if( lay ) - { - if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha ) - { - lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); - } - else - { - lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); - } - }*/ - } - break; - } - } - - descriptor->colorAttachments()->object(0)->setPixelFormat(attributes.outputFormat); + pipelineState->ConfigureBlend(descriptor->colorAttachments()->object(0)); - descriptor->setFragmentFunction(fragFunction); - descriptor->setVertexFunction(vertFunction); + MTL::VertexDescriptor *vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); + pipelineState->ConfigureVertexDescriptor(vertexDescriptor); descriptor->setVertexDescriptor(vertexDescriptor); - std::string label = "Render Pipeline: " + std::to_string(attributes.numUVs) + "UVs, " + std::to_string(attributes.numWeights) + " skin weight"; - descriptor->setLabel(NS::String::string(label.c_str(), NS::UTF8StringEncoding)); - - functionContents->release(); - - __block std::condition_variable *newCondition = new std::condition_variable(); - fConditionMap[attributes] = newCondition; - if(condOut) { - *condOut = newCondition; - } + descriptor->setDepthAttachmentPixelFormat(record.depthFormat); + descriptor->colorAttachments()->object(0)->setPixelFormat(record.colorFormat); + NS::Error* error; fMetalDevice->newRenderPipelineState(descriptor, ^(MTL::RenderPipelineState *pipelineState, NS::Error *error){ - if(error) { - hsAssert(0, error->localizedDescription()->cString(NS::UTF8StringEncoding)); + if (error) { //leave the condition in place for now, we don't want to //retry if the shader is defective. the condition will //prevent retries + hsAssert(0, error->localizedDescription()->cString(NS::UTF8StringEncoding)); } else { - //update the pipeline state, if it's null just set null - pipelineState->retain(); - plMetalLinkedPipeline *linkedPipeline = new plMetalLinkedPipeline(); - linkedPipeline->pipelineState = pipelineState; - linkedPipeline->fragFunction = fragFunction; - linkedPipeline->vertexFunction = vertFunction; + linkedPipeline->pipelineState = pipelineState->retain(); + linkedPipeline->fragFunction = fragmentFunction; + linkedPipeline->vertexFunction = vertexFunction; - fPipelineStateMap[attributes] = linkedPipeline; + fNewPipelineStateMap[record] = linkedPipeline; + //signal that we're done + newCondition->notify_all(); } - //signal that we're done - newCondition->notify_all(); }); + descriptor->release(); library->release(); } -plMetalDevice::plMetalLinkedPipeline* plMetalDevice::pipelineStateFor(const plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, int forShadows) -{ - plPipelineStateAtrributes attributes = plPipelineStateAtrributes(vRef, blendFlags, fCurrentFragmentOutputTexture->pixelFormat(), fCurrentDepthFormat, vertexShaderID, fragmentShaderID, forShadows, numLayers); - plMetalLinkedPipeline* renderState = fPipelineStateMap[attributes]; +plMetalDevice::plMetalLinkedPipeline* plMetalDevice::PipelineState(plMetalPipelineState* pipelineState) { + + MTL::PixelFormat depthFormat = fCurrentDepthFormat; + MTL::PixelFormat colorFormat = fCurrentFragmentOutputTexture->pixelFormat(); + + plMetalPipelineRecord record = { + depthFormat, + colorFormat + }; + + record.state = std::shared_ptr(pipelineState->Clone()); + + plMetalLinkedPipeline* renderState = fNewPipelineStateMap[record]; //if it exists, return it, we're done if(renderState) { @@ -1110,58 +843,53 @@ plMetalDevice::plMetalLinkedPipeline* plMetalDevice::pipelineStateFor(const plMe //Note: even if it already exists, this lock will be kept, and it will //let us through. This is to prevent race conditions where the render state //was null, but maybe in the time it took us to get here the state compiled. - std::condition_variable *alreadyBuildingCondition = fConditionMap[attributes]; + std::condition_variable *alreadyBuildingCondition = fConditionMap[record]; if(alreadyBuildingCondition) { std::unique_lock lock(fPipelineCreationMtx); alreadyBuildingCondition->wait(lock); //should be returning the render state here, if not it failed to build //we'll allow the null return - return fPipelineStateMap[attributes]; + return fNewPipelineStateMap[record]; } //it doesn't exist, start a build and wait //only render thread is allowed to start builds, //shouldn't be race conditions here - StartRenderPipelineBuild(attributes, &alreadyBuildingCondition); + StartPipelineBuild(record, &alreadyBuildingCondition); std::unique_lock lock(fPipelineCreationMtx); alreadyBuildingCondition->wait(lock); //should be returning the render state here, if not it failed to build //we'll allow the null return - return fPipelineStateMap[attributes]; + return fNewPipelineStateMap[record]; } -void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) +std::condition_variable* plMetalDevice::PrewarmPipelineStateFor(plMetalPipelineState* pipelineState) { - printf("Creating new command bufffer\n"); - fCurrentCommandBuffer = fCommandQueue->commandBuffer(); - fCurrentCommandBuffer->retain(); + MTL::PixelFormat depthFormat = fCurrentDepthFormat; + MTL::PixelFormat colorFormat = fCurrentFragmentOutputTexture->pixelFormat(); - //cache the depth buffer, we'll just clear it every time. - if(fCurrentDrawableDepthTexture == nullptr || - drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || - drawable->texture()->height() != fCurrentDrawableDepthTexture->height() - ) { - if(fCurrentDrawableDepthTexture) { - fCurrentDrawableDepthTexture->release(); - } - - MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, - drawable->texture()->width(), - drawable->texture()->height(), - false); - if(fMetalDevice->hasUnifiedMemory()) { - depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); - } else { - depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); - } - depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); - - fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); - } + plMetalPipelineRecord record = { + depthFormat, + colorFormat + }; - fCurrentDrawable = drawable->retain(); + record.state = std::shared_ptr(pipelineState->Clone()); + //only render thread is allowed to prewarm, no race conditions around + //fConditionMap creation + if(!fNewPipelineStateMap[record] && fConditionMap[record]) { + std::condition_variable *condOut; + StartPipelineBuild(record, &condOut); + return condOut; + } + return nullptr; +} + +bool plMetalDevice::plMetalPipelineRecord::operator==(const plMetalPipelineRecord &p) const { + return depthFormat == p.depthFormat && + colorFormat == p.colorFormat && + state->operator==(*p.state); } MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() @@ -1171,7 +899,6 @@ MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() void plMetalDevice::SubmitCommandBuffer() { - printf("Submitting command bufffer\n"); fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; @@ -1192,6 +919,14 @@ void plMetalDevice::SubmitCommandBuffer() fClearDepth = 1.0; } +std::size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMetalPipelineRecord const& s) const noexcept +{ + std::size_t value = std::hash()(s.depthFormat); + value ^= std::hash()(s.colorFormat); + value ^= std::hash()(*s.state); + return value; +} + MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() { //return the current render command encoder @@ -1201,7 +936,6 @@ MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() } if (!fCurrentRenderTargetCommandEncoder) { - printf("Asked for command encoder, but one not present. Creating..."); BeginNewRenderPass(); fClearColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 704a69dadc..a96f9ecfa7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -63,6 +63,7 @@ class plBitmap; class plMipmap; class plCubicEnvironmap; class plLayerInterface; +class plMetalPipelineState; matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder = true); @@ -72,6 +73,7 @@ class plMetalDevice friend plMetalPipeline; friend class plMetalMaterialShaderRef; friend class plMetalPlateManager; + friend class plMetalPipelineState; public: typedef plMetalVertexBufferRef VertexBufferRef; @@ -101,9 +103,9 @@ class plMetalDevice public: struct plMetalLinkedPipeline { - MTL::RenderPipelineState *pipelineState; - MTL::Function *fragFunction; - MTL::Function *vertexFunction; + const MTL::RenderPipelineState *pipelineState; + const MTL::Function *fragFunction; + const MTL::Function *vertexFunction; }; plMetalDevice(); @@ -137,8 +139,8 @@ class plMetalDevice void SetupTextureRef(plLayerInterface* layer, plBitmap* img, TextureRef* tRef); void CheckTexture(TextureRef* tRef); - void MakeTextureRef(TextureRef* tRef, plLayerInterface* layer, plMipmap* img); - void MakeCubicTextureRef(TextureRef* tRef, plLayerInterface* layer, plCubicEnvironmap* img); + void MakeTextureRef(TextureRef* tRef, plMipmap* img); + void MakeCubicTextureRef(TextureRef* tRef, plCubicEnvironmap* img); const char* GetErrorString() const { return fErrorMsg; } @@ -151,9 +153,6 @@ class plMetalDevice void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice); uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap); - std::condition_variable * prewarmPipelineStateFor(plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, bool forShadows = false); - ///Returns the proper pipeline state for the given vertex and fragment buffers, and the current drawable. These states should not be reused between drawables. - plMetalLinkedPipeline* pipelineStateFor(const plMetalVertexBufferRef * vRef, uint32_t blendFlags, uint32_t numLayers, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID, int forShadows = 0); //stencil states are expensive to make, they should be cached //FIXME: There should be a function to pair these with hsGMatState @@ -173,59 +172,43 @@ class plMetalDevice void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); private: - //internal struct for tracking which Metal state goes with which set of - //fragment/vertex pass attributes. This allows for shader program reuse. - //Hashable so we can use a std::unordered_map for storage - struct plPipelineStateAtrributes { - uint numUVs; - uint numLayers; - uint numWeights; - bool hasSkinIndices; - plShaderID::ID vertexShaderID; - plShaderID::ID fragmentShaderID; - //the specific blend mode flag, not the entire set of flags from a material - //these are defined as mutually exclusive anyway - //0 implies no blend flag set - uint32_t blendFlags; - MTL::PixelFormat outputFormat; + struct plMetalPipelineRecord { MTL::PixelFormat depthFormat; - int forShadows; - - bool operator==(const plPipelineStateAtrributes &p) const { - return numUVs == p.numUVs && numWeights == p.numWeights && blendFlags == p.blendFlags && hasSkinIndices == p.hasSkinIndices && outputFormat == p.outputFormat && vertexShaderID == p.vertexShaderID && fragmentShaderID == p.fragmentShaderID && depthFormat == p.depthFormat && forShadows == p.forShadows && numUVs == p.numUVs && numLayers == p.numLayers; - } - - plPipelineStateAtrributes(const plPipelineStateAtrributes &attributes) { - memcpy(this, &attributes, sizeof(plPipelineStateAtrributes)); - } + MTL::PixelFormat colorFormat; + std::shared_ptr state; - plPipelineStateAtrributes(const plMetalVertexBufferRef * vRef, const uint32_t blendFlags, const MTL::PixelFormat outputPixelFormat, const MTL::PixelFormat outputDepthFormat, const plShaderID::ID vertexShaderID, const plShaderID::ID fragmentShaderID, const int forShadows, const uint numLayers); + bool operator==(const plMetalPipelineRecord &p) const; }; - struct plPipelineStateAtrributesHashFunction + + struct plMetalPipelineRecordHashFunction { - std::size_t operator() (plPipelineStateAtrributes const & key) const - { - std::size_t h1 = std::hash()(key.numUVs); - std::size_t h2 = std::hash()(key.numWeights); - std::size_t h3 = std::hash()(key.blendFlags); - std::size_t h4 = std::hash()(key.hasSkinIndices); - std::size_t h5 = std::hash()(key.outputFormat); - std::size_t h6 = std::hash()(key.vertexShaderID); - std::size_t h7 = std::hash()(key.fragmentShaderID); - std::size_t h8 = std::hash()(key.depthFormat); - std::size_t h9 = std::hash()(key.forShadows); - std::size_t h10 = std::hash()(key.numLayers); - - return h1 ^ h2 ^ h3 ^ h4 ^ h5 ^ h6 ^ h7 ^ h8 ^ h9 ^ h10; - } + std::size_t operator()(plMetalPipelineRecord const& s) const noexcept; }; - std::unordered_map fPipelineStateMap; + std::unordered_map fNewPipelineStateMap; //the condition map allows consumers of pipeline states to wait until the pipeline state is ready - std::unordered_map fConditionMap; - void StartRenderPipelineBuild(plPipelineStateAtrributes &attributes, std::condition_variable **condOut); + std::unordered_map fConditionMap; std::mutex fPipelineCreationMtx; + void StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable **condOut); + std::condition_variable* PrewarmPipelineStateFor(plMetalPipelineState* pipelineState); + + struct plPipelineStateRecord { + MTL::PixelFormat outputFormat; + MTL::PixelFormat depthFormat; + plMetalPipelineState *state; + + bool operator==(const plPipelineStateRecord &p) const { + return (outputFormat == p.outputFormat && depthFormat == p.depthFormat && state == p.state); + } + + plPipelineStateRecord(const plPipelineStateRecord &attributes) { + memcpy(this, &attributes, sizeof(plPipelineStateRecord)); + } + }; + +protected: + plMetalLinkedPipeline* PipelineState(plMetalPipelineState* pipelineState); private: //these are internal bits for backing the current render pass diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 0e677e0b06..a58d1f0cfc 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -353,19 +353,6 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc void plMetalMaterialShaderRef::PopulateFragmentShaderLayerFromLayer(plFragmentShaderLayer *fragmentLayer, plLayerInterface* layer) { hsGMatState state = ICompositeLayerState(layer); plBitmap* texture = layer->GetTexture(); - if (texture != nullptr) { - plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); - if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { - fragmentLayer->passType = PassTypeCubicTexture; - } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { - fragmentLayer->passType = PassTypeTexture; - } - - } else { - fragmentLayer->passType = PassTypeColor; - } - - fragmentLayer->uvIndex = layer->GetUVWSrc(); switch (layer->GetClampFlags()) { case hsGMatState::kClampTextureU: @@ -381,9 +368,6 @@ void plMetalMaterialShaderRef::PopulateFragmentShaderLayerFromLayer(plFragmentSh fragmentLayer->sampleType = 0; break; } - - fragmentLayer->miscFlags = state.fMiscFlags; - fragmentLayer->blendMode = state.fBlendFlags; } uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) @@ -579,7 +563,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme if (state.fBlendFlags & hsGMatState::kBlendAlphaTestHigh) { uniforms->alphaThreshold = 64.f/255.f; } else { - uniforms->alphaThreshold = 0.00000000001f; + uniforms->alphaThreshold = 0.0001f; } } else { uniforms->alphaThreshold = 0.f; @@ -587,3 +571,59 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme return layer + currNumLayers; } + +void plMetalMaterialShaderRef::GetSourceArray(uint8_t *array, uint8_t pass) { + memset(array, 0, sizeof(uint8_t) * 8); + + uint16_t currNumLayers = fPassLengths[pass]; + uint16_t baseLayer = fPassIndices[pass]; + uint16_t i = 0; + for (i = 0; i < currNumLayers; i++) + { + plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); + plBitmap* texture = layPtr->GetTexture(); + if (texture != nullptr) { + plMetalTextureRef* texRef = (plMetalTextureRef*)texture->GetDeviceRef(); + if(!texRef->fTexture) + continue; + + plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); + if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { + array[i] = PassTypeCubicTexture; + } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { + array[i] = PassTypeTexture; + } else { + array[i] = PassTypeColor; + } + + } else { + array[i] = PassTypeColor; + } + } +} + +void plMetalMaterialShaderRef::GetBlendFlagArray(uint32_t *array, uint8_t pass) { + memset(array, 0, sizeof(uint8_t) * 8); + + uint16_t currNumLayers = fPassLengths[pass]; + uint16_t baseLayer = fPassIndices[pass]; + uint16_t i = 0; + for (i = 0; i < currNumLayers; i++) + { + plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); + array[i] = layPtr->GetBlendFlags(); + } +} + +void plMetalMaterialShaderRef::GetMiscFlagArray(uint32_t *array, uint8_t pass) { + memset(array, 0, sizeof(uint8_t) * 8); + + uint16_t currNumLayers = fPassLengths[pass]; + uint16_t baseLayer = fPassIndices[pass]; + uint16_t i = 0; + for (i = 0; i < currNumLayers; i++) + { + plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); + array[i] = layPtr->GetMiscFlags(); + } +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index f88e313d3f..3f31e13420 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -94,6 +94,9 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef static plLayerInterface* Passthrough(plLayerInterface* layer, uint32_t index) { return layer; } + void GetSourceArray(uint8_t *array, uint8_t pass); + void GetBlendFlagArray(uint32_t *array, uint8_t pass); + void GetMiscFlagArray(uint32_t *array, uint8_t pass); private: void ILoopOverLayers(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index ddad4b8a01..6f71599949 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -50,6 +50,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMetalPipeline.h" #include "plMetalMaterialShaderRef.h" #include "plMetalPlateManager.h" +#include "plMetalPipelineState.h" #include "hsTimer.h" #include "plPipeDebugFlags.h" @@ -148,7 +149,6 @@ bool plRenderTriListFunc::RenderPrims() const fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), BufferIndexState); - fDevice->CurrentRenderCommandEncoder()->setFragmentBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), BufferIndexState); fDevice->CurrentRenderCommandEncoder()->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, fNumTris, MTL::IndexTypeUInt16, fDevice->fCurrentIndexBuffer, (sizeof(uint16_t) * fIStart)); } @@ -269,7 +269,9 @@ bool plMetalPipeline::PrepForRender(plDrawable *drawable, std::vector & return true; } -plTextFont *plMetalPipeline::MakeTextFont(char *face, uint16_t size) { return nullptr; } +plTextFont *plMetalPipeline::MakeTextFont(char *face, uint16_t size) { + return nullptr; +} bool plMetalPipeline::OpenAccess(plAccessSpan &dst, plDrawableSpans *d, const plVertexSpan *span, bool readOnly) { return false; } @@ -1225,124 +1227,9 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) size_t pass; for (pass = 0; pass < mRef->GetNumPasses(); pass++) { IHandleMaterial(material, pass, &span, vRef); -#if 0 - plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); - fCurrLayerIdx = mRef->GetPassIndex(pass); - - ICalcLighting(mRef, lay, &span); - - hsGMatState s; - s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); - - /* - If the layer opacity is 0, don't draw it. This prevents it from contributing to the Z buffer. - This can happen with some models like the fire marbles in the neighborhood that have some models - for physics only, and then can block other rendering in the Z buffer. - DX pipeline does this in ILoopOverLayers. - */ - if( (s.fBlendFlags & hsGMatState::kBlendAlpha) - &&lay->GetOpacity() <= 0 - &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) { - continue; - } - - IHandleZMode(s); - IHandleBlendMode(s); - - if (s.fMiscFlags & hsGMatState::kMiscTwoSided) { - if(fCurrentCullMode != MTL::CullModeNone) { - fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); - fCurrentCullMode = MTL::CullModeNone; - } - } else { - ISetCullMode(); - } - - if(lay->GetVertexShader()) { - //pure shader path - plShader *vertexShader = lay->GetVertexShader(); - plShader *fragShader = lay->GetPixelShader(); - - fCurrLay = lay; - fCurrNumLayers = mRef->fPassLengths[pass]; - - ISetShaders(vRef, s, vertexShader, fragShader); - - //FIXME: Programmable pipeline does not implement the full feature set - /* - The programmable pipeline doesn't do things like set the texture transform matrices, - In practice, the transforms aren't set and used. Does it matter that the Metal - implementation doesn't implemention the full inputs the DX version gets? - - If it is implemented, the same checks the DX version does should be also implemented. - DX will set texture transforms, but then turn them off in the pipeline and manually - manipulate texture co-ords in the shader. - - Texture setting should also _maybe_ be reconciled with the "fixed" pipeline. But - the fixed pipeline uses indirect textures mapped to a buffer. That approach could - work for the programmable pipeline too, but I'm planning changes to the fixed pipeline - and the way it stores textures. So maybe things should be reconciled after that - work is done. - */ - - for (size_t i = mRef->GetPassIndex(pass); i < mRef->GetPassIndex(pass) + mRef->fPassLengths[pass]; i++) { - plLayerInterface* layer = material->GetLayer(i); - if (!layer) { - continue; - } - - CheckTextureRef(layer); - - plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); - - if (!img) { - continue; - } - - plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); - - if (!texRef->fTexture) { - continue; - } - fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i - mRef->GetPassIndex(pass)); - } - } else { - //"Fixed" path - - - s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); - - if (s.fBlendFlags & hsGMatState::kBlendInvertVtxAlpha) - fCurrentRenderPassUniforms->invVtxAlpha = true; - else - fCurrentRenderPassUniforms->invVtxAlpha = false; - - std::vector& spanLights = span.GetLightList(false); - - int numActivePiggyBacks = 0; - //FIXME: In the DX source, this check was done on the first layer. Does that mean the first layer of the material or the first layer of the pass? - if( !(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive) ) - { - /// Tack lightmap onto last stage if we have one - numActivePiggyBacks = fActivePiggyBacks; - //if( numActivePiggyBacks > fMaxLayersAtOnce - fCurrNumLayers ) - // numActivePiggyBacks = fMaxLayersAtOnce - fCurrNumLayers; - - } - - plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered); - MTL::RenderPipelineState *pipelineState = pipeline->pipelineState; - if(fCurrentPipelineState != pipelineState) { - fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); - fCurrentPipelineState = pipelineState; - } - - mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, plMetalMaterialShaderRef::Passthrough, plMetalMaterialShaderRef::Passthrough); - } -#endif if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) { - fCurrentRenderPassUniforms->ambientCol = simd_float4(1.0f); + fCurrentRenderPassUniforms->ambientCol = {1.0f, 1.0f, 1.0f, 1.0f}; fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->ambientSrc = 1.0; @@ -1513,16 +1400,21 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons } - plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered); - MTL::RenderPipelineState *pipelineState = pipeline->pipelineState; - if(fCurrentPipelineState != pipelineState) { - fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); - fCurrentPipelineState = pipelineState; - } + uint8_t sources[8]; + uint32_t blendModes[8]; + uint32_t miscFlags[8]; + memset(sources, 0, sizeof(sources)); + memset(blendModes, 0, sizeof(blendModes)); + memset(miscFlags, 0, sizeof(miscFlags)); + lay = IPopOverBaseLayer(lay); if(numActivePiggyBacks==0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass); + + mRef->GetSourceArray(sources, pass); + mRef->GetBlendFlagArray(blendModes, pass); + mRef->GetMiscFlagArray(miscFlags, pass); } else { mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, @@ -1531,6 +1423,22 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons layer = IPushOverBaseLayer(layer); } layer = IPushOverAllLayer(layer); + + plBitmap* texture = layer->GetTexture(); + if (texture != nullptr) { + plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); + if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { + sources[index] = PassTypeCubicTexture; + } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { + sources[index] = PassTypeTexture; + } + + } else { + sources[index] = PassTypeColor; + } + blendModes[index] = layer->GetBlendFlags(); + miscFlags[index] = layer->GetMiscFlags(); + return layer; }, [&](plLayerInterface* layer, uint32_t index){ @@ -1540,6 +1448,22 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons return layer; }); } + + struct plMetalMaterialPassDescription passDescription; + memcpy(passDescription.passTypes, sources, sizeof(sources)); + memcpy(passDescription.blendModes, blendModes, sizeof(blendModes)); + memcpy(passDescription.miscFlags, miscFlags, sizeof(miscFlags)); + passDescription.numLayers = numActivePiggyBacks + mRef->fPassLengths[pass]; + + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); + const MTL::RenderPipelineState *pipelineState = linkedPipeline->pipelineState; + + /*plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered, sources, blendModes, miscFlags); + const MTL::RenderPipelineState *pipelineState = pipeline->pipelineState;*/ + if(fCurrentPipelineState != pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); + fCurrentPipelineState = pipelineState; + } } return true; @@ -1778,7 +1702,7 @@ bool plMetalPipeline::ISetShaders(const plMetalVertexBufferRef * vRef, const hsG plShaderID::ID vertexShaderID = vShader->GetDecl()->GetID(); plShaderID::ID fragmentShaderID = pShader->GetDecl()->GetID(); - plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, blendMode.fBlendFlags, 0, vertexShaderID, fragmentShaderID); + plMetalDevice::plMetalLinkedPipeline *pipeline = plMetalDynamicMaterialPipelineState(&fDevice, vRef, blendMode.fBlendFlags, vertexShaderID, fragmentShaderID).GetRenderPipelineState(); if(fCurrentPipelineState != pipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipeline->pipelineState); fCurrentPipelineState = pipeline->pipelineState; @@ -2092,20 +2016,20 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0, 1.0 }; } else { hsColorRGBA amb = currLayer->GetPreshadeColor(); - fCurrentRenderPassUniforms->globalAmb = { amb.r, amb.g, amb.b, 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { amb.r, amb.g, amb.b, 1.0 }; + fCurrentRenderPassUniforms->globalAmb = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.0 }; } hsColorRGBA dif = currLayer->GetRuntimeColor(); - fCurrentRenderPassUniforms->diffuseCol = { dif.r, dif.g, dif.b, currLayer->GetOpacity() }; + fCurrentRenderPassUniforms->diffuseCol = { static_cast(dif.r), static_cast(dif.g), static_cast(dif.b), static_cast(currLayer->GetOpacity()) }; hsColorRGBA em = currLayer->GetAmbientColor(); - fCurrentRenderPassUniforms->emissiveCol = { em.r, em.g, em.b, 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b), 1.0 }; // Set specular properties if (state.fShadeFlags & hsGMatState::kShadeSpecular) { hsColorRGBA spec = currLayer->GetSpecularColor(); - fCurrentRenderPassUniforms->specularCol = { spec.r, spec.g, spec.b, 1.0 }; + fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), 1.0 }; #if 0 mat.Power = currLayer->GetSpecularPower(); #endif @@ -2155,12 +2079,12 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye fCurrentRenderPassUniforms->diffuseCol = { 0.0, 0.0, 0.0, 0.0 }; hsColorRGBA em = currLayer->GetAmbientColor(); - fCurrentRenderPassUniforms->emissiveCol = { em.r, em.g, em.b, 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b), 1.0 }; // Set specular properties if (state.fShadeFlags & hsGMatState::kShadeSpecular) { hsColorRGBA spec = currLayer->GetSpecularColor(); - fCurrentRenderPassUniforms->specularCol = { spec.r, spec.g, spec.b, 1.0 }; + fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), 1.0 }; #if 0 mat.Power = currLayer->GetSpecularPower(); #endif @@ -2169,7 +2093,7 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye } hsColorRGBA amb = currLayer->GetPreshadeColor(); - fCurrentRenderPassUniforms->globalAmb = { amb.r, amb.g, amb.b, amb.a }; + fCurrentRenderPassUniforms->globalAmb = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a) }; fCurrentRenderPassUniforms->ambientSrc = 0.0; fCurrentRenderPassUniforms->diffuseSrc = 0.0; @@ -2200,7 +2124,7 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye fCurrentRenderPassUniforms->fogExponential = 0; fCurrentRenderPassUniforms->fogValues = {start, end}; - fCurrentRenderPassUniforms->fogColor = { color.r, color.g, color.b }; + fCurrentRenderPassUniforms->fogColor = { static_cast(color.r), static_cast(color.g), static_cast(color.b) }; break; } case plFogEnvironment::kExpFog: @@ -2212,7 +2136,7 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye fCurrentRenderPassUniforms->fogExponential = 1; fCurrentRenderPassUniforms->fogValues = { power, density}; - fCurrentRenderPassUniforms->fogColor = { color.r, color.g, color.b }; + fCurrentRenderPassUniforms->fogColor = { static_cast(color.r), static_cast(color.g), static_cast(color.b) }; break; } default: @@ -2301,13 +2225,13 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef void plMetalPipeline::IEnableLight(plMetalMaterialShaderRef* mRef, size_t i, plLightInfo* light) { hsColorRGBA amb = light->GetAmbient(); - fCurrentRenderPassUniforms->lampSources[i].ambient = { amb.r, amb.g, amb.b, amb.a }; + fCurrentRenderPassUniforms->lampSources[i].ambient = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a) }; hsColorRGBA diff = light->GetDiffuse(); - fCurrentRenderPassUniforms->lampSources[i].diffuse = { diff.r, diff.g, diff.b, diff.a }; + fCurrentRenderPassUniforms->lampSources[i].diffuse = { static_cast(diff.r), static_cast(diff.g), static_cast(diff.b), static_cast(diff.a) }; hsColorRGBA spec = light->GetSpecular(); - fCurrentRenderPassUniforms->lampSources[i].specular = { spec.r, spec.g, spec.b, spec.a }; + fCurrentRenderPassUniforms->lampSources[i].specular = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), static_cast(spec.a) }; plDirectionalLightInfo* dirLight = nullptr; plOmniLightInfo* omniLight = nullptr; @@ -2414,10 +2338,6 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) plMetalPlateManager *pm = (plMetalPlateManager *)fPlateMgr; - MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); - renderPassDescriptor->colorAttachments()->object(0)->setTexture(fDevice.GetCurrentDrawable()->texture()); - renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); - fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pm->fPlateRenderPipelineState); fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(pm->fDepthState); fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); @@ -3636,7 +3556,7 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp } /// Switch to the vertex buffer we want - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = fDevice.pipelineStateFor(vRef, hsGMatState::kBlendAlpha, fCurrentRenderPassUniforms->numUVSrcs, plShaderID::ID(0), plShaderID::ID(0), true); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowCasterPipelineState(&fDevice, vRef).GetRenderPipelineState(); fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); @@ -3672,8 +3592,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con bool first = true; - int i; - for( i = 0; i < fShadows.size(); i++ ) + for(size_t i = 0; i < fShadows.size(); i++ ) { if( slaveBits.IsBitSet(fShadows[i]->fIndex) ) { @@ -3698,7 +3617,18 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con // in projecting the shadow map onto the scene. ISetupShadowLight(fShadows[i]); - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = fDevice.pipelineStateFor(vRef, hsGMatState::kBlendAlpha, fCurrentRenderPassUniforms->numUVSrcs, plShaderID::ID(0), plShaderID::ID(0), 2); + struct plMetalMaterialPassDescription passDescription; + memset(&passDescription.miscFlags, 0, sizeof(passDescription.miscFlags)); + memset(&passDescription.blendModes, 0, sizeof(passDescription.blendModes)); + memset(&passDescription.passTypes, 0, sizeof(passDescription.passTypes)); + passDescription.Populate(mat->GetLayer(0), 2); + passDescription.numLayers = 3; + if (mat->GetNumLayers()>1) { + passDescription.Populate(mat->GetLayer(1), 2); + passDescription.numLayers = 3; + } + + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); if(fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fCurrentPipelineState = linkedPipeline->pipelineState; @@ -4210,13 +4140,13 @@ void plMetalPipeline::CheckTextureRef(plLayerInterface* layer) if (tRef->IsDirty()) { plMipmap* mip = plMipmap::ConvertNoRef(bitmap); if (mip) { - fDevice.MakeTextureRef(tRef, layer, mip); + fDevice.MakeTextureRef(tRef, mip); return; } plCubicEnvironmap* cubic = plCubicEnvironmap::ConvertNoRef(bitmap); if (cubic) { - fDevice.MakeCubicTextureRef(tRef, layer, cubic); + fDevice.MakeCubicTextureRef(tRef, cubic); return; } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index faad2c1ce5..24c114c2a3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -149,7 +149,7 @@ class plMetalPipeline : public pl3DPipeline VertexUniforms* fCurrentRenderPassUniforms; //cache to prevent oversetting, Metal won't catch this for us and will encode extra work - MTL::RenderPipelineState* fCurrentPipelineState; + const MTL::RenderPipelineState* fCurrentPipelineState; MTL::DepthStencilState* fCurrentDepthStencilState; void FindFragFunction(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp new file mode 100644 index 0000000000..7f3b78fc3f --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -0,0 +1,426 @@ +// +// plMetalPipelineState.cpp +// plPipeline +// +// Created by Colin Cornaby on 3/10/22. +// + +#include "plMetalPipelineState.h" +#include "plDrawable/plGBufferGroup.h" +#include "plSurface/plLayerInterface.h" +#include "plSurface/hsGMaterial.h" +#include "plMetalDevice.h" +#include "plGImage/plMipmap.h" +#include "plGImage/plCubicEnvironmap.h" +#include "plPipeline/plCubicRenderTarget.h" +#include "plPipeline/plRenderTarget.h" +#include "plMetalDevice.h" + +plMetalPipelineState::plMetalPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) +: fDevice(device) +{ + fNumUVs = plGBufferGroup::CalcNumUVs(vRef->fFormat); + fNumWeights = (vRef->fFormat & plGBufferGroup::kSkinWeightMask) >> 4; + fHasSkinIndices = (vRef->fFormat & plGBufferGroup::kSkinIndices); +} + +void plMetalPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const +{ + constants->setConstantValue(&fNumUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); +} + +size_t plMetalPipelineState::GetHash() const { + std::size_t h1 = std::hash()(fNumUVs); + std::size_t h2 = std::hash()(fNumWeights); + std::size_t h3 = std::hash()(fHasSkinIndices); + std::size_t h4 = std::hash()(GetID()); + + return h1 ^ h2 ^ h3 ^ h4; +} + +plMetalDevice::plMetalLinkedPipeline* plMetalPipelineState::GetRenderPipelineState() { + return fDevice->PipelineState(this); +} + +void plMetalPipelineState::PrewarmRenderPipelineState() { + fDevice->PrewarmPipelineStateFor(this); +} + + +plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalMaterialPassDescription &description) +: plMetalPipelineState(device, vRef) { + fPassDescription = description; +} + +void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const +{ + plMetalPipelineState::GetFunctionConstants(constants); + constants->setConstantValue(&fPassDescription.numLayers, MTL::DataTypeUChar, FunctionConstantNumLayers); + constants->setConstantValues(&fPassDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); + constants->setConstantValues(&fPassDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); + constants->setConstantValues(&fPassDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); +} + +size_t plMetalMaterialPassPipelineState::GetHash() const { + std::size_t value = plMetalPipelineState::GetHash(); + value ^= fPassDescription.GetHash(); + + return value; +} + +void plMetalPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) { + int vertOffset = 0; + int skinWeightOffset = vertOffset + (sizeof(float) * 3); + if(this->fHasSkinIndices) { + skinWeightOffset += sizeof(uint32_t); + } + int normOffset = skinWeightOffset + (sizeof(float) * this->fNumWeights); + int colorOffset = normOffset + (sizeof(float) * 3); + int baseUvOffset = colorOffset + (sizeof(uint32_t) * 2); + int stride = baseUvOffset + (sizeof(float) * 3 * this->fNumUVs); + + vertexDescriptor->attributes()->object(VertexAttributePosition)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributePosition)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributePosition)->setOffset(vertOffset); + + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setOffset(normOffset); + + for(int i=0; ifNumUVs; i++) { + vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setOffset(baseUvOffset + (i * sizeof(float) * 3)); + } + + vertexDescriptor->attributes()->object(VertexAttributeColor)->setFormat(MTL::VertexFormatUChar4); + vertexDescriptor->attributes()->object(VertexAttributeColor)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeColor)->setOffset(colorOffset); + + vertexDescriptor->layouts()->object(VertexAttributePosition)->setStride(stride); +} + +void plMetalPipelineState::ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor *descriptor) +{ + if (blendMode & hsGMatState::kBlendNoColor) { + //printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + return; + } + switch (blendMode & hsGMatState::kBlendMask) + { + // Detail is just a special case of alpha, handled in construction of the texture + // mip chain by making higher levels of the chain more transparent. + case hsGMatState::kBlendDetail: + case hsGMatState::kBlendAlpha: + if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { + if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { + //printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + } else { + //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + } + } else { + if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { + //printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + } else { + //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + } + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + } + break; + + // Multiply the final color onto the frame buffer. + case hsGMatState::kBlendMult: + if (blendMode & hsGMatState::kBlendInvertFinalColor) { + //printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceColor); + } else { + //printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceColor); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceColor); + } + break; + + // Add final color to FB. + case hsGMatState::kBlendAdd: + //printf("glBlendFunc(GL_ONE, GL_ONE);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + break; + + // Multiply final color by FB color and add it into the FB. + case hsGMatState::kBlendMADD: + //printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorDestinationColor); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorDestinationColor); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + break; + + // Final color times final alpha, added into the FB. + case hsGMatState::kBlendAddColorTimesAlpha: + if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { + //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + } else { + //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + } + break; + + // Overwrite final color onto FB + case 0: + //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); + //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); + + /*descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero);*/ + break; + + default: + { + /*hsAssert(false, "Too many blend modes specified in material"); + plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); + if( lay ) + { + if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha ) + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); + } + else + { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); + } + }*/ + } + break; + } +} + +MTL::Function* plMetalMaterialPassPipelineState::GetVertexFunction(MTL::Library* library) { + NS::Error* error = nullptr; + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); + this->GetFunctionConstants(constants); + MTL::Function* function = library->newFunction( + NS::String::string("pipelineVertexShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + &error + )->autorelease(); + return function; +} + +MTL::Function* plMetalMaterialPassPipelineState::GetFragmentFunction(MTL::Library* library) { + return library->newFunction( + NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error **)NULL + )->autorelease(); +} + +plMetalMaterialPassPipelineState::~plMetalMaterialPassPipelineState() { +} + +const NS::String* plMetalMaterialPassPipelineState::GetDescription() { + return NS::MakeConstantString("Material Pipeline"); +} + +void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { + uint32_t blendMode = fPassDescription.blendModes[0]; + ConfigureBlendMode(blendMode, descriptor); +} + +void plMetalMaterialPassDescription::Populate(plLayerInterface* layPtr, uint8_t index) { + if (layPtr == nullptr) { + blendModes[index] = 0; + miscFlags[index] = 0; + passTypes[index] = 0; + } + + blendModes[index] = layPtr->GetBlendFlags(); + miscFlags[index] = layPtr->GetMiscFlags(); + + plBitmap* texture = layPtr->GetTexture(); + if (texture != nullptr) { + plMetalTextureRef* texRef = (plMetalTextureRef*)texture->GetDeviceRef(); + if(texRef->fTexture) { + + plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); + if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { + passTypes[index] = PassTypeCubicTexture; + } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { + passTypes[index] = PassTypeTexture; + } else { + passTypes[index] = PassTypeColor; + } + } + + } else { + passTypes[index] = PassTypeColor; + } + +} + +bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) const { + return static_cast(&p)->fPassDescription == this->fPassDescription; +} + + +MTL::Function* plMetalRenderShadowPipelineState::GetFragmentFunction(MTL::Library* library) { + return library->newFunction( + NS::String::string("shadowCastFragmentShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error **)NULL + )->autorelease(); +} + +void plMetalRenderShadowPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); +} + +const MTL::Function* plMetalRenderShadowCasterPipelineState::GetVertexFunction(MTL::Library* library) +{ + NS::Error* error = nullptr; + MTL::Function* function = library->newFunction( + NS::String::string("shadowVertexShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + &error + )->autorelease(); + return function; +} + +const MTL::Function* plMetalRenderShadowCasterPipelineState::GetFragmentFunction(MTL::Library* library) +{ + NS::Error* error = nullptr; + MTL::Function* function = library->newFunction( + NS::String::string("shadowFragmentShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + &error + )->autorelease(); + return function; +} + +const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL::Library *library) { + MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); + MTL::Function* vertFunction; + switch(fVertexShaderID) { + case plShaderID::vs_WaveFixedFin7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveFixedFin7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::vs_CompCosines: + vertFunction = library->newFunction( + NS::String::string("vs_CompCosines", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::vs_BiasNormals: + vertFunction = library->newFunction( + NS::String::string("vs_BiasNormals", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::vs_GrassShader: + vertFunction = library->newFunction( + NS::String::string("vs_GrassShader", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::vs_WaveDecEnv_7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveDecEnv_7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + default: + hsAssert(0, "unknown shader requested"); + } + return vertFunction; +} + +const MTL::Function* plMetalDynamicMaterialPipelineState::GetFragmentFunction(MTL::Library *library) { + MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); + MTL::Function* fragFunction; + switch(fFragmentShaderID) { + case plShaderID::ps_WaveFixed: + fragFunction = library->newFunction( + NS::String::string("ps_WaveFixed", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::ps_MoreCosines: + fragFunction = library->newFunction( + NS::String::string("ps_CompCosines", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::ps_BiasNormals: + fragFunction = library->newFunction( + NS::String::string("ps_BiasNormals", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::ps_GrassShader: + fragFunction = library->newFunction( + NS::String::string("ps_GrassShader", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + case plShaderID::ps_WaveDecEnv: + fragFunction = library->newFunction( + NS::String::string("ps_WaveDecEnv", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; + default: + hsAssert(0, "unknown shader requested"); + } + return fragFunction; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h new file mode 100644 index 0000000000..d3edd90feb --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -0,0 +1,218 @@ +// +// plMetalPipelineState.hpp +// plPipeline +// +// Created by Colin Cornaby on 3/10/22. +// + +#ifndef plMetalPipelineState_hpp +#define plMetalPipelineState_hpp + +#include +#include + +#include "plMetalDevice.h" +#include "plMetalMaterialShaderRef.h" +#include "plSurface/plShaderTable.h" + +class plMetalPipelineState { +public: + plMetalPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef); + plMetalDevice::plMetalLinkedPipeline* GetRenderPipelineState(); + void PrewarmRenderPipelineState(); + bool operator==(const plMetalPipelineState& p) const { + if ((&p)->GetID() != this->GetID()) { + return false; + } else { + return p.fNumUVs == fNumUVs && p.fNumWeights == fNumWeights && p.fHasSkinIndices == fHasSkinIndices && IsEqual(p); + } + } + virtual size_t GetHash() const; + virtual bool IsEqual(const plMetalPipelineState &p) const = 0; + virtual uint16_t GetID() const { return 0; }; + virtual plMetalPipelineState* Clone() = 0; + + // + virtual const MTL::Function* GetVertexFunction(MTL::Library* library) = 0; + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) = 0; + virtual const NS::String* GetDescription() = 0; + + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor); + + void ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor *descriptor); +protected: + plMetalDevice* fDevice; + uint8_t fNumUVs; + uint8_t fNumWeights; + bool fHasSkinIndices; + virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const; + MTL::FunctionConstantValues* MakeFunctionConstants() { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); + this->GetFunctionConstants(constants); + return constants; + } +}; + +struct plMetalMaterialPassDescription { + uint8_t passTypes[8]; + uint32_t blendModes[8]; + uint32_t miscFlags[8]; + uint8_t numLayers; + + bool operator==(const plMetalMaterialPassDescription &p) const { + bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0; + return match; + } + + size_t GetHash() const { + std::size_t value = std::hash()(numLayers); + + for(int i=0;i<8;i++){ + value ^= std::hash()( blendModes[i] ); + } + + for(int i=0;i<8;i++){ + value ^= std::hash()( miscFlags[i] ); + } + + for(int i=0;i<8;i++){ + value ^= std::hash()( passTypes[i] ); + } + + return value; + } + + void Populate(plLayerInterface* layPtr, uint8_t index); +}; + +template<> +struct std::hash +{ + std::size_t operator()(plMetalMaterialPassDescription const& s) const noexcept + { + return s.GetHash(); + } +}; + +class plMetalMaterialPassPipelineState: public plMetalPipelineState { +public: + plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, const plMetalMaterialPassDescription &description); + virtual size_t GetHash() const override; + MTL::Function* GetVertexFunction(MTL::Library* library) override; + MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + virtual const NS::String* GetDescription() override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; + + virtual bool IsEqual(const plMetalPipelineState &p) const override; + + virtual uint16_t GetID() const override { return 1; }; + + virtual plMetalPipelineState* Clone() override { + return new plMetalMaterialPassPipelineState(*this); + } + ~plMetalMaterialPassPipelineState(); + virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const override; +protected: + plMetalMaterialPassDescription fPassDescription; +}; + +class plMetalRenderShadowCasterPipelineState: public plMetalPipelineState { +public: + plMetalRenderShadowCasterPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) + : plMetalPipelineState(device, vRef) { + + } + const MTL::Function* GetVertexFunction(MTL::Library* library) override; + const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + const NS::String* GetDescription() override { + return NS::MakeConstantString("Shadow Caster Pipeline"); + }; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); + }; + virtual uint16_t GetID() const override { return 2; }; + + bool IsEqual(const plMetalPipelineState &p) const override { + //nothing to add at this level + return true; + } + + + virtual plMetalPipelineState* Clone() override { + return new plMetalRenderShadowCasterPipelineState(*this); + } + +}; + +class plMetalRenderShadowPipelineState: public plMetalMaterialPassPipelineState { +public: + plMetalRenderShadowPipelineState(plMetalDevice* device, plMetalVertexBufferRef *vRef, const plMetalMaterialPassDescription &description) + : plMetalMaterialPassPipelineState(device, vRef, description) { + } + + const NS::String* GetDescription() override { + return NS::MakeConstantString("Shadow Span Render Pipeline"); + }; + MTL::Function* GetFragmentFunction(MTL::Library* library) override; + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; + virtual uint16_t GetID() const override { return 3; } ; + + virtual plMetalPipelineState* Clone() override { + return new plMetalRenderShadowPipelineState(*this); + } +}; + +class plMetalDynamicMaterialPipelineState: public plMetalPipelineState { +public: + plMetalDynamicMaterialPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, uint32_t blendMode, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID) + : plMetalPipelineState(device, vRef), + fVertexShaderID(vertexShaderID), + fFragmentShaderID(fragmentShaderID), + fBlendMode(blendMode) { + + }; + + virtual plMetalPipelineState* Clone() override { + return new plMetalDynamicMaterialPipelineState(*this); + } + + bool IsEqual(const plMetalPipelineState &p) const override { + const plMetalDynamicMaterialPipelineState* dynamicState = static_cast(&p); + if (!dynamicState) { + return false; + } + return dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID; + } + + const MTL::Function* GetVertexFunction(MTL::Library *library) override; + const MTL::Function* GetFragmentFunction(MTL::Library *library) override; + + const NS::String *GetDescription() override { + return NS::MakeConstantString("Dynamic Shader"); + } + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override { + ConfigureBlendMode(fBlendMode, descriptor); + } +protected: + plShaderID::ID fVertexShaderID; + plShaderID::ID fFragmentShaderID; + uint32_t fBlendMode; +}; + +template<> +struct std::hash +{ + std::size_t operator()(plMetalPipelineState const& s) const noexcept + { + return s.GetHash(); + } +}; + +#endif /* plMetalPipelineState_hpp */ diff --git a/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp b/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp index 28dfdfb6f6..bf806ad867 100644 --- a/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp +++ b/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp @@ -463,6 +463,9 @@ plMipmap *plMipmap::IReadRLEImage( hsStream *stream ) bool done = false; plMipmap *retVal = new plMipmap(fWidth,fHeight,plMipmap::kARGB32Config,1); + if(retVal->GetKeyName() == "RightDTMap2_dynText") { + printf("hi"); + } uint32_t *curPos = (uint32_t*)retVal->fImage; uint32_t curLoc = 0; From 1b1c2d583678fbdd7b4b7fa59a20be259963cd78 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 19 Mar 2022 13:17:37 -0700 Subject: [PATCH 010/165] Fixing crash on Intel graphics Intel graphics have unified memory, but don't support memoryless textures. Now explicitly checking for the Apple GPU family. --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 2 +- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 7753787d56..de51ef4513 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -752,7 +752,7 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) drawable->texture()->width(), drawable->texture()->height(), false); - if(fMetalDevice->hasUnifiedMemory()) { + if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 6f71599949..98cfd6984e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -399,7 +399,7 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) owner->GetWidth(), owner->GetHeight(), false); - if(fDevice.fMetalDevice->hasUnifiedMemory()) { + if (fDevice.fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); @@ -3379,7 +3379,8 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe owner->GetWidth(), owner->GetHeight(), false); - if(fDevice.fMetalDevice->hasUnifiedMemory()) { + + if (fDevice.fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); From b1fbfa089242ea845b215c0508a44e0fcdddc7af Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 20 Mar 2022 18:58:34 -0700 Subject: [PATCH 011/165] Initial version of avatar texture rendering --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 + .../pfMetalPipeline/ShaderSrc/Avatar.metal | 38 +++ .../pfMetalPipeline/plMetalDevice.cpp | 2 +- .../pfMetalPipeline/plMetalDevice.h | 2 +- .../plMetalMaterialShaderRef.cpp | 12 +- .../pfMetalPipeline/plMetalPipeline.cpp | 234 ++++++++++++++---- .../pfMetalPipeline/plMetalPipeline.h | 7 +- 7 files changed, 242 insertions(+), 54 deletions(-) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 8ddb5b0481..24f1e39c30 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -126,6 +126,7 @@ elseif(APPLE) ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal ) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal new file mode 100644 index 0000000000..3d4a7829c8 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal @@ -0,0 +1,38 @@ +// +// Avatar.metal +// plClient +// +// Created by Colin Cornaby on 3/2/22. +// + +#include +using namespace metal; + + +typedef struct { + float4 position [[position]]; + float2 uvPosition; +} PreprocessAvatarTexturesInOut; + +typedef struct +{ + float2 position [[attribute(0)]]; + float2 uvPostion [[attribute(1)]]; +} PreprocessAvatarVertex; + +vertex PreprocessAvatarTexturesInOut PreprocessAvatarVertexShader(PreprocessAvatarVertex in [[stage_in]]) { + return { float4(in.position.x, in.position.y, 0.0, 1.0 ), in.uvPostion }; +} + +fragment half4 PreprocessAvatarFragmentShader(PreprocessAvatarTexturesInOut in [[stage_in]], + texture2d layer [[ texture(0) ]]) +{ + constexpr sampler colorSampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_zero); + + half4 colorSample = layer.sample(colorSampler, in.uvPosition.xy); + + return colorSample; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index de51ef4513..6a4e7e3973 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -525,7 +525,7 @@ void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef *iRef, plGB iRef->SetDirty(false); } -void plMetalDevice::SetupTextureRef(plLayerInterface *layer, plBitmap *img, plMetalDevice::TextureRef *tRef) +void plMetalDevice::SetupTextureRef(plBitmap *img, plMetalDevice::TextureRef *tRef) { tRef->fOwner = img; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index a96f9ecfa7..9b09926197 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -137,7 +137,7 @@ class plMetalDevice void CheckIndexBuffer(IndexBufferRef* iRef); void FillIndexBufferRef(IndexBufferRef* iRef, plGBufferGroup* owner, uint32_t idx); - void SetupTextureRef(plLayerInterface* layer, plBitmap* img, TextureRef* tRef); + void SetupTextureRef(plBitmap* img, TextureRef* tRef); void CheckTexture(TextureRef* tRef); void MakeTextureRef(TextureRef* tRef, plMipmap* img); void MakeCubicTextureRef(TextureRef* tRef, plCubicEnvironmap* img); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index a58d1f0cfc..8f49ef542d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -105,7 +105,7 @@ void plMetalMaterialShaderRef::CheckMateralRef() continue; } - fPipeline->CheckTextureRef(layer); + fPipeline->CheckLayerTextureRef(layer); } } } @@ -121,7 +121,7 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en continue; } - fPipeline->CheckTextureRef(layer); + fPipeline->CheckLayerTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -175,7 +175,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode continue; } - fPipeline->CheckTextureRef(layer); + fPipeline->CheckLayerTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -202,7 +202,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode continue; } - fPipeline->CheckTextureRef(layer); + fPipeline->CheckLayerTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -243,7 +243,7 @@ void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encode if (!layer) { return; } - fPipeline->CheckTextureRef(layer); + fPipeline->CheckLayerTextureRef(layer); // Load the image plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -329,7 +329,7 @@ const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerIn void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer, simd_float4 *colorMap) { - fPipeline->CheckTextureRef(layer); + fPipeline->CheckLayerTextureRef(layer); plBitmap* texture = layer->GetTexture(); if (texture != nullptr && encoder) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 98cfd6984e..fb493014ca 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1353,7 +1353,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons return false; } - CheckTextureRef(layer); + CheckLayerTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -2561,6 +2561,11 @@ int plMetalPipeline::ISetNumActivePiggyBacks() return fActivePiggyBacks = std::min(static_cast(fMaxPiggyBacks), fPiggyBackStack.size()); } +struct plAVTexVert { + simd_float2 fPos; + simd_float2 fUv; +}; + void plMetalPipeline::IPreprocessAvatarTextures() { plProfile_Set(AvRTPoolUsed, fClothingOutfits.size()); @@ -2573,25 +2578,14 @@ void plMetalPipeline::IPreprocessAvatarTextures() if (fClothingOutfits.size() == 0) return; - - static float kIdentityMatrix[16] = { - 1.0f, 0.0f, 0.0f, 0.0f, - 0.0f, 1.0f, 0.0f, 0.0f, - 0.0f, 0.0f, 1.0f, 0.0f, - 0.0f, 0.0f, 0.0f, 1.0f - }; - - //glUniformMatrix4fv(mRef->uMatrixProj, 1, GL_TRUE, kIdentityMatrix); - //glUniformMatrix4fv(mRef->uMatrixW2C, 1, GL_TRUE, kIdentityMatrix); - //glUniformMatrix4fv(mRef->uMatrixC2W, 1, GL_TRUE, kIdentityMatrix); - //glUniformMatrix4fv(mRef->uMatrixL2W, 1, GL_TRUE, kIdentityMatrix); + + plMipmap *itemBufferTex = nullptr; for (size_t oIdx = 0; oIdx < fClothingOutfits.size(); oIdx++) { plClothingOutfit* co = fClothingOutfits[oIdx]; if (co->fBase == nullptr || co->fBase->fBaseTexture == nullptr) continue; -#if 0 plRenderTarget* rt = plRenderTarget::ConvertNoRef(co->fTargetLayer->GetTexture()); if (rt != nullptr && co->fDirtyItems.Empty()) // we've still got our valid RT from last frame and we have nothing to do. @@ -2601,17 +2595,92 @@ void plMetalPipeline::IPreprocessAvatarTextures() rt = IGetNextAvRT(); co->fTargetLayer->SetTexture(rt); } -#endif - //PushRenderTarget(rt); + PushRenderTarget(rt); + fDevice.CurrentRenderCommandEncoder()->setViewport({0, 0, static_cast(rt->GetWidth()), static_cast(rt->GetHeight()), 0.f, 1.f}); + + static MTL::RenderPipelineState* baseAvatarRenderState = nullptr; + static MTL::RenderPipelineState* avatarRenderState = nullptr; + + if (!baseAvatarRenderState) { + //This is a bit of a hack, this really should be part of the plMetalDevice's function map. + //But that hash map assumes that it follows the vertex arrangement of the models. + //After a refactor, this function creation should go there. + MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init()->autorelease(); + MTL::Library* library = fDevice.fMetalDevice->newDefaultLibrary()->autorelease(); + + MTL::Function* vertFunction = library->newFunction(NS::MakeConstantString("PreprocessAvatarVertexShader"))->autorelease(); + MTL::Function* fragFunction = library->newFunction(NS::MakeConstantString("PreprocessAvatarFragmentShader"))->autorelease(); + + descriptor->setVertexFunction(vertFunction); + descriptor->setFragmentFunction(fragFunction); + + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); + vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(0)->setBufferIndex(0); + vertexDescriptor->attributes()->object(0)->setOffset(0); + vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(1)->setBufferIndex(1); + vertexDescriptor->attributes()->object(1)->setOffset(sizeof(float) * 2); + + vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 4); + vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 4); + + descriptor->setVertexDescriptor(vertexDescriptor); + + descriptor->colorAttachments()->object(0)->setBlendingEnabled(false); + descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + NS::Error* error = nullptr; + baseAvatarRenderState = fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); + + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + avatarRenderState = fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); + } - // HACK HACK HACK - co->fTargetLayer->SetTexture(co->fBase->fBaseTexture); + float uOff = 0.5f / rt->GetWidth(); + float vOff = 0.5f / rt->GetHeight(); + + plClothingLayout *layout = plClothingMgr::GetClothingMgr()->GetLayout(co->fBase->fLayoutName); - // TODO: Actually render to the render target + for (plClothingItem *item : co->fItems) + { + + for (size_t j = 0; j < item->fElements.size(); j++) + { + for (int k = 0; k < plClothingElement::kLayerMax; k++) + { + if (item->fTextures[j][k] == nullptr) + continue; + + itemBufferTex = item->fTextures[j][k]; + hsColorRGBA tint = co->GetItemTint(item, k); + if (k >= plClothingElement::kLayerSkinBlend1 && k <= plClothingElement::kLayerSkinLast) + tint.a = co->fSkinBlends[k - plClothingElement::kLayerSkinBlend1]; + + if (k == plClothingElement::kLayerBase) + { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(baseAvatarRenderState); + } + else + { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(avatarRenderState); + } + + float screenW = (float)item->fElements[j]->fWidth / layout->fOrigWidth * 2.f; + float screenH = (float)item->fElements[j]->fHeight / layout->fOrigWidth * 2.f; + float screenX = (float)item->fElements[j]->fXPos / layout->fOrigWidth * 2.f - 1.f; + float screenY = (1.f - (float)item->fElements[j]->fYPos / layout->fOrigWidth) * 2.f - 1.f - screenH; + IDrawClothingQuad(screenX, screenY, screenW, screenH, uOff, vOff, itemBufferTex); + } + } + } - //PopRenderTarget(); - //co->fDirtyItems.Clear(); + PopRenderTarget(); + co->fDirtyItems.Clear(); } fView.fXformResetFlags = fView.kResetAll; @@ -2619,6 +2688,56 @@ void plMetalPipeline::IPreprocessAvatarTextures() fClothingOutfits.swap(fPrevClothingOutfits); } +void plMetalPipeline::IDrawClothingQuad(float x, float y, float w, float h, + float uOff, float vOff, plMipmap *tex) +{ + const uint32_t kVSize = sizeof(plAVTexVert); + plMetalTextureRef* ref = (plMetalTextureRef*)tex->GetDeviceRef(); + if (!ref || ref->IsDirty()) + { + CheckTextureRef(tex); + ref = (plMetalTextureRef*)tex->GetDeviceRef(); + } + if (!ref->fTexture) + { + IReloadTexture(tex, ref); + } + hsRefCnt_SafeAssign( fLayerRef[0], ref ); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 0); + + plAVTexVert ptr[4]; + plAVTexVert vert; + vert.fPos[0] = x; + vert.fPos[1] = y; + vert.fPos[2] = 0.5f; + vert.fUv[0] = uOff; + vert.fUv[1] = 1.f + vOff; + + // P0 + ptr[2] = vert; + + // P1 + ptr[0] = vert; + ptr[0].fPos[0] += w; + ptr[0].fUv[0] += 1.f; + + // P2 + ptr[1] = vert; + ptr[1].fPos[0] += w; + ptr[1].fUv[0] += 1.f; + ptr[1].fPos[1] += h; + ptr[1].fUv[1] -= 1.f; + + // P3 + ptr[3] = vert; + ptr[3].fPos[1] += h; + ptr[3].fUv[1] -= 1.f; + + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(ptr, sizeof(ptr), 0); + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(ptr, sizeof(ptr), 1); + fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveType::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); +} + void plMetalPipeline::FindFragFunction() { MTL::Library *library = fDevice.fMetalDevice->newDefaultLibrary(); @@ -4117,40 +4236,65 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, // CheckTextureRef ////////////////////////////////////////////////////// // Make sure the given layer's texture has background D3D resources allocated. -void plMetalPipeline::CheckTextureRef(plLayerInterface* layer) +void plMetalPipeline::CheckLayerTextureRef(plLayerInterface* layer) { plBitmap* bitmap = layer->GetTexture(); if (bitmap) { - plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); + CheckTextureRef(bitmap); + } +} - if (!tRef) { - tRef = new plMetalTextureRef(); +void plMetalPipeline::CheckTextureRef(plBitmap* bitmap) +{ + plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); + + if (!tRef) { + tRef = static_cast(MakeTextureRef(bitmap)); + } + + // If it's dirty, refill it. + if (tRef->IsDirty()) { + IReloadTexture(bitmap, tRef); + } +} - fDevice.SetupTextureRef(layer, bitmap, tRef); - } +hsGDeviceRef *plMetalPipeline::MakeTextureRef(plBitmap* bitmap) +{ + plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); - if (!tRef->IsLinked()) { - tRef->Link(&fTextureRefList); - } + if (!tRef) { + tRef = new plMetalTextureRef(); - // Make sure it has all resources created. - fDevice.CheckTexture(tRef); + fDevice.SetupTextureRef(bitmap, tRef); + } - // If it's dirty, refill it. - if (tRef->IsDirty()) { - plMipmap* mip = plMipmap::ConvertNoRef(bitmap); - if (mip) { - fDevice.MakeTextureRef(tRef, mip); - return; - } + if (!tRef->IsLinked()) { + tRef->Link(&fTextureRefList); + } - plCubicEnvironmap* cubic = plCubicEnvironmap::ConvertNoRef(bitmap); - if (cubic) { - fDevice.MakeCubicTextureRef(tRef, cubic); - return; - } - } + // Make sure it has all resources created. + fDevice.CheckTexture(tRef); + + // If it's dirty, refill it. + if (tRef->IsDirty()) { + IReloadTexture( bitmap, tRef ); + } + return tRef; +} + +void plMetalPipeline::IReloadTexture( plBitmap* bitmap, plMetalTextureRef *ref ) +{ + plMipmap* mip = plMipmap::ConvertNoRef(bitmap); + if (mip) { + fDevice.MakeTextureRef(ref, mip); + return; + } + + plCubicEnvironmap* cubic = plCubicEnvironmap::ConvertNoRef(bitmap); + if (cubic) { + fDevice.MakeCubicTextureRef(ref, cubic); + return; } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 24c114c2a3..5b80105ef3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -140,7 +140,10 @@ class plMetalPipeline : public pl3DPipeline // Create and/or Refresh geometry buffers void CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) override; void CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) override; - void CheckTextureRef(plLayerInterface* lay) override; + void CheckLayerTextureRef(plLayerInterface* lay) override; + void CheckTextureRef(plBitmap* bitmap); + hsGDeviceRef *MakeTextureRef(plBitmap* bitmap); + void IReloadTexture( plBitmap* bitmap, plMetalTextureRef *ref ); void ISetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalVertexBufferRef* vRef); uint32_t IGetBufferFormatSize( uint8_t format ) const; @@ -164,6 +167,8 @@ class plMetalPipeline : public pl3DPipeline void IDrawPlate(plPlate* plate); void IPreprocessAvatarTextures(); + void IDrawClothingQuad(float x, float y, float w, float h, + float uOff, float vOff, plMipmap *tex); void IClearShadowSlaves(); void IReleaseDynDeviceObjects(); From 4090cd895048f9d76039e266f195c5761d801d5e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 20 Mar 2022 18:59:29 -0700 Subject: [PATCH 012/165] Fixing missing license headers --- .../pfMetalPipeline/ShaderSrc/Avatar.metal | 47 ++++++++++++++++--- .../pfMetalPipeline/plMetalPipelineState.cpp | 47 ++++++++++++++++--- .../pfMetalPipeline/plMetalPipelineState.h | 47 ++++++++++++++++--- 3 files changed, 123 insertions(+), 18 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal index 3d4a7829c8..440159b02b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal @@ -1,9 +1,44 @@ -// -// Avatar.metal -// plClient -// -// Created by Colin Cornaby on 3/2/22. -// +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ #include using namespace metal; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 7f3b78fc3f..6b3677decd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -1,9 +1,44 @@ -// -// plMetalPipelineState.cpp -// plPipeline -// -// Created by Colin Cornaby on 3/10/22. -// +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ #include "plMetalPipelineState.h" #include "plDrawable/plGBufferGroup.h" diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index d3edd90feb..09dc1d4a68 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -1,9 +1,44 @@ -// -// plMetalPipelineState.hpp -// plPipeline -// -// Created by Colin Cornaby on 3/10/22. -// +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ #ifndef plMetalPipelineState_hpp #define plMetalPipelineState_hpp From 2826d1a47aefab75279dd879b5ae36736fb5de24 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 20 Mar 2022 23:06:18 -0700 Subject: [PATCH 013/165] Fixes for clothing rendering --- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index fb493014ca..07ddd56862 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2562,8 +2562,8 @@ int plMetalPipeline::ISetNumActivePiggyBacks() } struct plAVTexVert { - simd_float2 fPos; - simd_float2 fUv; + float fPos[2]; + float fUv[2]; }; void plMetalPipeline::IPreprocessAvatarTextures() @@ -2620,11 +2620,10 @@ void plMetalPipeline::IPreprocessAvatarTextures() vertexDescriptor->attributes()->object(0)->setBufferIndex(0); vertexDescriptor->attributes()->object(0)->setOffset(0); vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); - vertexDescriptor->attributes()->object(1)->setBufferIndex(1); + vertexDescriptor->attributes()->object(1)->setBufferIndex(0); vertexDescriptor->attributes()->object(1)->setOffset(sizeof(float) * 2); vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 4); - vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 4); descriptor->setVertexDescriptor(vertexDescriptor); @@ -2709,7 +2708,6 @@ void plMetalPipeline::IDrawClothingQuad(float x, float y, float w, float h, plAVTexVert vert; vert.fPos[0] = x; vert.fPos[1] = y; - vert.fPos[2] = 0.5f; vert.fUv[0] = uOff; vert.fUv[1] = 1.f + vOff; @@ -2734,7 +2732,6 @@ void plMetalPipeline::IDrawClothingQuad(float x, float y, float w, float h, ptr[3].fUv[1] -= 1.f; fDevice.CurrentRenderCommandEncoder()->setVertexBytes(ptr, sizeof(ptr), 0); - fDevice.CurrentRenderCommandEncoder()->setVertexBytes(ptr, sizeof(ptr), 1); fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveType::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } From b5fb8bb147c02772eed5da94655382dde55910c0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 21 Mar 2022 22:15:56 -0700 Subject: [PATCH 014/165] Initial version of Metal device enumeration --- Sources/Plasma/Apps/plClient/plClient.cpp | 2 +- .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 1 + .../pfMetalPipeline/plMetalEnumerate.h | 13 +++ .../pfMetalPipeline/plMetalEnumerate.mm | 92 +++++++++++++++++++ .../pfMetalPipeline/plMetalPipeline.cpp | 2 + .../pfMetalPipeline/plMetalPipeline.h | 13 +++ 6 files changed, 122 insertions(+), 1 deletion(-) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm diff --git a/Sources/Plasma/Apps/plClient/plClient.cpp b/Sources/Plasma/Apps/plClient/plClient.cpp index 64915060b5..2ef6ae8a38 100644 --- a/Sources/Plasma/Apps/plClient/plClient.cpp +++ b/Sources/Plasma/Apps/plClient/plClient.cpp @@ -433,7 +433,7 @@ plPipeline* plClient::ICreatePipeline(hsWindowHndl disp, hsWindowHndl hWnd, cons #endif #ifdef PLASMA_PIPELINE_METAL - //if (renderer == hsG3DDeviceSelector::kDevTypeOpenGL) + if (renderer == hsG3DDeviceSelector::kDevTypeMetal) return new plMetalPipeline(disp, hWnd, devMode); #endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index d8af9bfcf4..4552e1cabb 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -19,6 +19,7 @@ set(pfMetalPipeline_SOURCES plMetalShader.cpp plMetalFragmentShader.cpp plMetalVertexShader.cpp + plMetalEnumerate.mm ) set(pfMetalPipeline_HEADERS diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h new file mode 100644 index 0000000000..3628b91368 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h @@ -0,0 +1,13 @@ +// +// plMetalEnumerate.hpp +// pfMetalPipeline +// +// Created by Colin Cornaby on 3/20/22. +// + +#ifndef plMetalEnumerate_hpp +#define plMetalEnumerate_hpp + +#include + +#endif /* plMetalEnumerate_hpp */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm new file mode 100644 index 0000000000..b86372de01 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm @@ -0,0 +1,92 @@ + +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include "HeadSpin.h" + +#include + +#include + +#include "plMetalPipeline.h" +#include + +void plMetalEnumerate::Enumerate(std::vector& records) +{ + //For now - just use the default device. If there is a high power discrete device - this will spin it up. + //This will also automatically pin us to an eGPU if present and the user has configured us to use it. + MTL::Device* device = MTL::CreateSystemDefaultDevice(); + + if (device) { + hsG3DDeviceRecord devRec; + devRec.SetG3DDeviceType(hsG3DDeviceSelector::kDevTypeMetal); + devRec.SetDriverName("Metal"); + devRec.SetDeviceDesc(device->name()->utf8String()); + //Metal has ways to query capabilities, but doesn't expose a flat version + //Populate with the OS version + @autoreleasepool { + NSProcessInfo *processInfo = [NSProcessInfo processInfo]; + NSOperatingSystemVersion version = processInfo.operatingSystemVersion; + NSString *versionString = [NSString stringWithFormat:@"%li.%li.%li", (long)version.majorVersion, (long)version.minorVersion, version.patchVersion]; + devRec.SetDriverVersion([versionString cStringUsingEncoding:NSUTF8StringEncoding]); + } + devRec.SetDriverDesc(device->name()->utf8String()); + + devRec.SetCap(hsG3DDeviceSelector::kCapsMipmap); + devRec.SetCap(hsG3DDeviceSelector::kCapsPerspective); + devRec.SetCap(hsG3DDeviceSelector::kCapsCompressTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsDoesSmallTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsPixelShader); + devRec.SetCap(hsG3DDeviceSelector::kCapsHardware); + + devRec.SetLayersAtOnce(8); + + // Just make a fake mode so the device selector will let it through + hsG3DDeviceMode devMode; + devMode.SetWidth(hsG3DDeviceSelector::kDefaultWidth); + devMode.SetHeight(hsG3DDeviceSelector::kDefaultHeight); + devMode.SetColorDepth(hsG3DDeviceSelector::kDefaultDepth); + devRec.GetModes().emplace_back(devMode); + + records.emplace_back(devRec); + } +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 07ddd56862..52f4a339ac 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -124,6 +124,8 @@ plProfile_CreateCounter("AvRTPoolRes", "PipeC", AvRTPoolRes); plProfile_CreateCounter("AvRTShrinkTime", "PipeC", AvRTShrinkTime); plProfile_CreateCounter("NumSkin", "PipeC", NumSkin); +plMetalEnumerate plMetalPipeline::enumerator; + class plRenderTriListFunc : public plRenderPrimFunc { protected: diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 5b80105ef3..1e7be6244e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -59,6 +59,17 @@ class plShadowCaster; const uint kMaxSkinWeightsPerMaterial = 3; +class plMetalEnumerate +{ +public: + plMetalEnumerate() { + hsG3DDeviceSelector::AddDeviceEnumerator(&plMetalEnumerate::Enumerate); + } + +private: + static void Enumerate(std::vector& records); +}; + //// Helper Classes /////////////////////////////////////////////////////////// //// The RenderPrimFunc lets you have one function which does a lot of stuff @@ -238,6 +249,8 @@ class plMetalPipeline : public pl3DPipeline void PushCurrentLightSources(); void PopCurrentLightSources(); std::vector fLightSourceStack; + + static plMetalEnumerate enumerator; }; #endif // _plGLPipeline_inc_ From 62bc4d0ce29d19adb3789dd3434785ed75978f85 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 21 Mar 2022 22:18:01 -0700 Subject: [PATCH 015/165] Fixing CheckTextureRef Thats a virtual interface, can't just rename that. Ooops! --- .../pfMetalPipeline/plMetalMaterialShaderRef.cpp | 12 ++++++------ .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 4 ++-- .../FeatureLib/pfMetalPipeline/plMetalPipeline.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 8f49ef542d..a58d1f0cfc 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -105,7 +105,7 @@ void plMetalMaterialShaderRef::CheckMateralRef() continue; } - fPipeline->CheckLayerTextureRef(layer); + fPipeline->CheckTextureRef(layer); } } } @@ -121,7 +121,7 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en continue; } - fPipeline->CheckLayerTextureRef(layer); + fPipeline->CheckTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -175,7 +175,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode continue; } - fPipeline->CheckLayerTextureRef(layer); + fPipeline->CheckTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -202,7 +202,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode continue; } - fPipeline->CheckLayerTextureRef(layer); + fPipeline->CheckTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -243,7 +243,7 @@ void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encode if (!layer) { return; } - fPipeline->CheckLayerTextureRef(layer); + fPipeline->CheckTextureRef(layer); // Load the image plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -329,7 +329,7 @@ const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerIn void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer, simd_float4 *colorMap) { - fPipeline->CheckLayerTextureRef(layer); + fPipeline->CheckTextureRef(layer); plBitmap* texture = layer->GetTexture(); if (texture != nullptr && encoder) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 52f4a339ac..007a74cf82 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1355,7 +1355,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons return false; } - CheckLayerTextureRef(layer); + CheckTextureRef(layer); plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); @@ -4235,7 +4235,7 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, // CheckTextureRef ////////////////////////////////////////////////////// // Make sure the given layer's texture has background D3D resources allocated. -void plMetalPipeline::CheckLayerTextureRef(plLayerInterface* layer) +void plMetalPipeline::CheckTextureRef(plLayerInterface* layer) { plBitmap* bitmap = layer->GetTexture(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 1e7be6244e..b7682f7f18 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -151,7 +151,7 @@ class plMetalPipeline : public pl3DPipeline // Create and/or Refresh geometry buffers void CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) override; void CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) override; - void CheckLayerTextureRef(plLayerInterface* lay) override; + void CheckTextureRef(plLayerInterface* lay) override; void CheckTextureRef(plBitmap* bitmap); hsGDeviceRef *MakeTextureRef(plBitmap* bitmap); void IReloadTexture( plBitmap* bitmap, plMetalTextureRef *ref ); From b6397a4b89def5116a13f044e389e1e03933dcfa Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 21 Mar 2022 22:20:17 -0700 Subject: [PATCH 016/165] Adding Metal pipeline as friend to AvatarClothing --- Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h b/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h index f420197347..e5599bd0f5 100644 --- a/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h +++ b/Sources/Plasma/PubUtilLib/plAvatar/plAvatarClothing.h @@ -64,6 +64,7 @@ class plArmatureMod; class plSharedMesh; class plStateDataRecord; class plDXPipeline; +class plMetalPipeline; struct plClothingItemOptions { @@ -163,6 +164,7 @@ class plClothingBase : public hsKeyedObject class plClothingOutfit : public plSynchedObject { friend class plDXPipeline; + friend class plMetalPipeline; public: plArmatureMod *fAvatar; From b889050c778eca7c33b2d06ed1e17291520172cc Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 21 Mar 2022 22:20:54 -0700 Subject: [PATCH 017/165] Adding Metal device picking --- .../PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp | 10 +++++++++- .../Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp index 75d78c0baf..402f2cea42 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp +++ b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.cpp @@ -368,6 +368,7 @@ bool hsG3DDeviceSelector::GetRequested(hsG3DDeviceModeRecord *dmr, uint32_t devT hsG3DDeviceRecord* iTnL = nullptr; hsG3DDeviceRecord* iD3D = nullptr; + hsG3DDeviceRecord* iMetal = nullptr; hsG3DDeviceRecord* iOpenGL = nullptr; hsG3DDeviceRecord* device = nullptr; @@ -396,14 +397,21 @@ bool hsG3DDeviceSelector::GetRequested(hsG3DDeviceModeRecord *dmr, uint32_t devT if (iOpenGL == nullptr || force) iOpenGL = &record; break; + + case kDevTypeMetal: + if (iMetal == nullptr || force) + iMetal = &record; + break; } } - // Pick a default device (Priority D3D T&L, D3D HAL, OpenGL) + // Pick a default device (Priority D3D T&L, D3D HAL, Metal, OpenGL) if (iTnL != nullptr) device = iTnL; else if (iD3D != nullptr) device = iD3D; + else if (iMetal != nullptr) + device = iMetal; else if (iOpenGL != nullptr) device = iOpenGL; else diff --git a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h index ccd6ee77c8..e0760deca5 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h +++ b/Sources/Plasma/PubUtilLib/plPipeline/hsG3DDeviceSelector.h @@ -269,6 +269,7 @@ class hsG3DDeviceSelector : public hsRefCnt kDevTypeUnknown = 0, kDevTypeDirect3D, kDevTypeOpenGL, + kDevTypeMetal, kNumDevTypes }; From 702262babf663a0bd1098dd4ba92d5de6b9e19ab Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 22 Mar 2022 22:38:08 -0700 Subject: [PATCH 018/165] Fixing clear color Plasma is clearing the main drawable before it does offscreen drawing. In Metal, clearing and beginning drawing are linked. Need to cache the clear command until the main drawable pass begins. Strangely enough offscreen buffers like the shadow buffer still aren't clearing. This doesn't match what I've seen from the DirectX branch. Needs further investigation. --- .../pfMetalPipeline/plMetalDevice.cpp | 82 +++++++++++++------ .../pfMetalPipeline/plMetalDevice.h | 11 +-- .../pfMetalPipeline/plMetalPipeline.cpp | 8 +- 3 files changed, 67 insertions(+), 34 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 6a4e7e3973..2b656c80ed 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -112,13 +112,27 @@ void plMetalDevice::Shutdown() void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth) { + //Plasma may clear a target and draw at different times. + //This is specifically trouble with the drawable clear + //Plasma might clear the drawable, and then go off and do + //off screen stuff. Metal doesn't work that way, we need to + //draw and clear at the same time. So if it's a clear for the + //current drawable, remember that and perform the clear when + //we're actually drawing to screen. if (shouldClearColor) { - fClearColor = clearColor; - } - fShouldClearColor = shouldClearColor; - - if (shouldClearDepth) { - fClearDepth = clearDepth; + if (fCurrentRenderTarget) { + fClearRenderTargetColor = clearColor; + fShouldClearRenderTarget = shouldClearColor; + if (shouldClearDepth) { + fClearRenderTargetDepth = clearDepth; + } + } else { + fClearDrawableColor = clearColor; + fShouldClearDrawable = shouldClearColor; + if (shouldClearDepth) { + fClearDrawableDepth = clearDepth; + } + } } if (fCurrentRenderTargetCommandEncoder) { @@ -150,25 +164,33 @@ void plMetalDevice::BeginNewRenderPass() { MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); - renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearColor.x, fClearColor.y, fClearColor.z, fClearColor.w)); - if (fShouldClearColor) { - renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); - } else { - renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); - } if (fCurrentRenderTarget) { + renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearRenderTargetColor.x, fClearRenderTargetColor.y, fClearRenderTargetColor.z, fClearRenderTargetColor.w)); + if (fShouldClearRenderTarget) { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + } + if ( fCurrentRenderTarget->GetZDepth() ) { plMetalRenderTargetRef* deviceTarget= (plMetalRenderTargetRef *)fCurrentRenderTarget->GetDeviceRef(); renderPassDescriptor->depthAttachment()->setTexture(deviceTarget->fDepthBuffer); - renderPassDescriptor->depthAttachment()->setClearDepth(fClearDepth); + renderPassDescriptor->depthAttachment()->setClearDepth(fClearRenderTargetDepth); renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); } fCurrentRenderTargetCommandEncoder = fCurrentOffscreenCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); } else { + renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearDrawableColor.x, fClearDrawableColor.y, fClearDrawableColor.z, fClearDrawableColor.w)); + if (fShouldClearDrawable) { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionClear); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + } + renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); - renderPassDescriptor->depthAttachment()->setClearDepth(fClearDepth); + renderPassDescriptor->depthAttachment()->setClearDepth(fClearDrawableDepth); renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); @@ -196,11 +218,11 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) fCurrentRenderTarget = target; - if ( fCurrentRenderTarget && fShouldClearColor == false ) { + if ( fCurrentRenderTarget && fShouldClearRenderTarget == false ) { // clear if a clear color wasn't already set - fClearColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); - fShouldClearColor = true; - fClearDepth = 1.0; + fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearRenderTarget = true; + fClearRenderTargetDepth = 1.0; } if(fCurrentRenderTarget) { @@ -233,7 +255,8 @@ plMetalDevice::plMetalDevice() fCurrentRenderTarget(nullptr), fNewPipelineStateMap() { - fClearColor = {0.0, 0.0, 0.0, 1.0}; + fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; + fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; fMetalDevice = MTL::CreateSystemDefaultDevice(); fCommandQueue = fMetalDevice->newCommandQueue(); @@ -914,9 +937,12 @@ void plMetalDevice::SubmitCommandBuffer() fCurrentDrawable->release(); fCurrentDrawable = nil; - fClearColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); - fShouldClearColor = false; - fClearDepth = 1.0; + fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fClearDrawableColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearRenderTarget = false; + fShouldClearDrawable = false; + fClearRenderTargetDepth = 1.0; + fClearDrawableDepth = 1.0; } std::size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMetalPipelineRecord const& s) const noexcept @@ -938,9 +964,15 @@ MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() if (!fCurrentRenderTargetCommandEncoder) { BeginNewRenderPass(); - fClearColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); - fShouldClearColor = false; - fClearDepth = 1.0; + if (fCurrentRenderTarget) { + fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearRenderTarget = false; + fClearRenderTargetDepth = 1.0; + } else { + fClearDrawableColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); + fShouldClearDrawable = false; + fClearDrawableDepth = 1.0; + } } return fCurrentRenderTargetCommandEncoder; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 9b09926197..d31e7e6ceb 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -149,8 +149,6 @@ class plMetalDevice void SetWorldToCameraMatrix(const hsMatrix44& src); void SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder = true); - void SetClearColor(simd_float4 clearColor) { fClearColor = clearColor; }; - void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice); uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap); @@ -221,9 +219,12 @@ class plMetalDevice MTL::Texture* fCurrentFragmentOutputTexture; CA::MetalDrawable* fCurrentDrawable; MTL::PixelFormat fCurrentDepthFormat; - simd_float4 fClearColor; - bool fShouldClearColor; - float fClearDepth; + simd_float4 fClearRenderTargetColor; + simd_float4 fClearDrawableColor; + bool fShouldClearRenderTarget; + bool fShouldClearDrawable; + float fClearRenderTargetDepth; + float fClearDrawableDepth; plRenderTarget* fCurrentRenderTarget; void BeginNewRenderPass(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 007a74cf82..fe783be840 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1462,10 +1462,10 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons /*plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered, sources, blendModes, miscFlags); const MTL::RenderPipelineState *pipelineState = pipeline->pipelineState;*/ - if(fCurrentPipelineState != pipelineState) { + //if(fCurrentPipelineState != pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); fCurrentPipelineState = pipelineState; - } + //} } return true; @@ -3748,10 +3748,10 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con } plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); - if(fCurrentPipelineState != linkedPipeline->pipelineState) { + //if(fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fCurrentPipelineState = linkedPipeline->pipelineState; - } + //} int selfShadowNow = span->IsShadowBitSet(fShadows[i]->fIndex); From 45b5013b9dca4b3470078b2fae070c6e205eef15 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 24 Mar 2022 22:16:35 -0700 Subject: [PATCH 019/165] Hacking in opacity support for plates --- .../FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal | 4 +++- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 ++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal index 5e38fca6eb..db6e2953b3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal @@ -89,7 +89,8 @@ vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]], fragment float4 fragmentShader(ColorInOut in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - device plMetalFragmentShaderArgumentBuffer & fragmentShaderArgs [[ buffer(BufferIndexFragArgBuffer) ]], + constant plMetalFragmentShaderArgumentBuffer & fragmentShaderArgs [[ buffer(BufferIndexFragArgBuffer) ]], + constant float & alpha [[ buffer(6) ]], texture2d colorMap [[ texture(Texture) ]]) { constexpr sampler colorSampler(mip_filter::linear, @@ -97,6 +98,7 @@ fragment float4 fragmentShader(ColorInOut in [[stage_in]], min_filter::linear); half4 colorSample = colorMap.sample(colorSampler, in.texCoord.xy); + colorSample.a *= alpha; return float4(colorSample); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index fe783be840..1a005f6f90 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2341,6 +2341,8 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) plMetalPlateManager *pm = (plMetalPlateManager *)fPlateMgr; fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pm->fPlateRenderPipelineState); + float alpha = material->GetLayer(0)->GetOpacity(); + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&alpha, sizeof(float), 6); fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(pm->fDepthState); fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); From 2f7e6cad03796694287ca9985958bb5df6665563 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 27 Mar 2022 00:01:56 -0700 Subject: [PATCH 020/165] Fixing self shadow bug, removing LUT Avatar was self shadowing because the shadow map reflected the avatar state from the previous frame. The LUT for the shadow maps was a workaround to turn the x position into a color value. Removing the LUT in since we can just directly use that value in Metal instead of needing a texture go between. This also should slightly improve shadow quality over DX. Additional LUT removal --- .../ShaderSrc/FixedPipelineShaders.metal | 16 ++-- .../pfMetalPipeline/plMetalPipeline.cpp | 79 +++---------------- .../pfMetalPipeline/plMetalPipeline.h | 3 +- 3 files changed, 21 insertions(+), 77 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 3cab28d062..401f613bd2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -648,19 +648,17 @@ vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], return out; } -fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]], - texture2d colorMap [[ texture(0) ]]) +fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) { //D3DTTFF_COUNT3, D3DTSS_TCI_CAMERASPACEPOSITION - short currentAlpha = colorMap.sample(colorSamplers[3], float2(in.texCoord1.xy)).a; + const half currentAlpha = in.texCoord1.x; - return half4(1.0h, 1.0h, 1.0h, half(currentAlpha)/255.0h); + return half4(1.0h, 1.0h, 1.0h, currentAlpha); } fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], texture2d texture [[ texture(16) ]], - texture2d LUT [[ texture(17) ]], constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(BufferIndexShadowCastFragArgBuffer) ]], FragmentShaderArguments layers, constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowAlphaSrc) ]]) @@ -673,21 +671,21 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], currentColor.rgb *= in.vtxColor.rgb; const float2 LUTCoords = in.texCoord2.xy; - const half4 LUTColor = half4(LUT.sample(colorSamplers[3], LUTCoords))/255.0h; + const half4 LUTColor = half4(LUTCoords.x); - currentColor.rgb = (1.0 - LUTColor.rgb) * currentColor.rgb; + currentColor.rgb = (1.0h - LUTColor.rgb) * currentColor.rgb; currentColor.a = LUTColor.a - currentColor.a; //only possible alpha sources are layers 0 or 1 if(alphaSrc == 0) { - half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[0].sampleType, miscFlags[2], in.texCoord3, half4(layers.colors[0]), (&layers.textures)[0], (&layers.cubicTextures)[0]); + half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[0].sampleType, miscFlags[1], in.texCoord3, half4(layers.colors[0]), (&layers.textures)[0], (&layers.cubicTextures)[0]); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; } else if(alphaSrc == 1) { - half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[1].sampleType, miscFlags[2], in.texCoord3, half4(layers.colors[1]), (&layers.textures)[1], (&layers.cubicTextures)[1]); + half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[1].sampleType, miscFlags[1], in.texCoord3, half4(layers.colors[1]), (&layers.textures)[1], (&layers.cubicTextures)[1]); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 1a005f6f90..efc6b16ac3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -891,7 +891,7 @@ void plMetalPipeline::RenderSpans(plDrawableSpans *ice, const std::vectorsetRenderPipelineState(baseAvatarRenderState); + if(fCurrentPipelineState != baseAvatarRenderState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(baseAvatarRenderState); + fCurrentPipelineState = baseAvatarRenderState; + } } else { - fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(avatarRenderState); + if(fCurrentPipelineState != avatarRenderState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(avatarRenderState); + fCurrentPipelineState = avatarRenderState; + } } float screenW = (float)item->fElements[j]->fWidth / layout->fOrigWidth * 2.f; @@ -2954,8 +2960,8 @@ bool plMetalPipeline::IPrepShadowCaster(const plShadowCaster* caster) drawable->PrepForRender( this ); // Do any software skinning. - //if( !ISoftwareVertexBlend(drawable, visList) ) - // return false; + if( !ISoftwareVertexBlend(drawable, visList) ) + return false; } } @@ -3064,50 +3070,6 @@ void plMetalPipeline::IPreprocessShadows() plProfile_EndTiming(PrepShadows); } -// IGetULutTextureRef /////////////////////////////////////////////////////////// -// The ULut just translates a U coordinate in range [0..1] into -// color and alpha of U * 255.9f. We just have the one we keep -// lying around. -plMetalTextureRef* plMetalPipeline::IGetULutTextureRef() -{ - const int width = 256; - const int height = 1; - if( !fULutTextureRef ) - { - uint32_t* tData = new uint32_t[width * height]; - - uint32_t* pData = tData; - int j; - for( j = 0; j < height; j++ ) - { - int i; - for( i = 0; i < width; i++ ) - { - *pData = (i << 24) - | (i << 16) - | (i << 8) - | (i << 0); - pData++; - } - } - - MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatRGBA8Uint, width, height, false); - textureDescriptor->setResourceOptions(MTL::ResourceStorageModeManaged | MTL::CPUCacheModeWriteCombined); - MTL::Buffer* buffer = fDevice.fMetalDevice->newBuffer(tData, width * height * sizeof(uint32_t), MTL::ResourceStorageModeManaged | MTL::CPUCacheModeWriteCombined); - buffer->didModifyRange(NS::Range::Make(0, buffer->length())); - MTL::Texture* texture = buffer->newTexture(textureDescriptor, 0, width * 4); - plMetalTextureRef* ref = new plMetalTextureRef(); - ref->fTexture = texture; - - ref->Link(&fTextureRefList); - - fULutTextureRef = ref; - - buffer->release(); - } - return fULutTextureRef; -} - // IPushShadowCastState //////////////////////////////////////////////////////////////////////////////// // Push all the state necessary to start rendering this shadow map, but independent of the // actual shadow caster to be rendered into the map. @@ -3123,7 +3085,6 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) return false; // Set texture to U_LUT - plMetalTextureRef* ref = IGetULutTextureRef(); fCurrentRenderPassUniforms->specularSrc = 0.0; //if( !ref->fTexture ) @@ -3141,7 +3102,6 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) // Push the shadow map as the current render target PushRenderTarget(renderTarg); - fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 0); // We'll be rendering the light space distance to the span fragment into // alpha (color is white), so our camera space position, transformed into light space @@ -3209,9 +3169,6 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE); fLayerState[1].fBlendFlags = uint32_t(-1);*/ - hsRefCnt_SafeAssign( fLayerRef[0], ref ); - fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, Texture); - //fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE); //fD3DDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE); //fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO); @@ -3693,7 +3650,7 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp static hsMatrix44 emptyMatrix; hsMatrix44 m = emptyMatrix; - ISetupTransforms(drawable, span, NULL, m); + ISetupTransforms(drawable, span, m); bool flip = slave->ReverseCull(); ISetCullMode(flip); @@ -3746,7 +3703,6 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con passDescription.numLayers = 3; if (mat->GetNumLayers()>1) { passDescription.Populate(mat->GetLayer(1), 2); - passDescription.numLayers = 3; } plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); @@ -3831,15 +3787,6 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) fCurrentDepthStencilState = fDevice.fNoZWriteStencilState; } - plMetalTextureRef* ref = IGetULutTextureRef(); - if( !ref->fTexture ) - { - //if( ref->fData ) - // IReloadTexture(ref); - } - hsRefCnt_SafeAssign(fLayerRef[1], ref); - fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 17); - int numUVSrcs = 2; int layerIndex = -1; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index b7682f7f18..fc411101f6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -136,7 +136,7 @@ class plMetalPipeline : public pl3DPipeline int GetMaxAntiAlias(int Width, int Height, int ColorDepth) override; void ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync = false) override; void RenderSpans(plDrawableSpans* ice, const std::vector& visList) override; - void ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, plMetalMaterialShaderRef* mRef, hsMatrix44& lastL2W); + void ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, hsMatrix44& lastL2W); bool ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase); bool IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef); void IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, @@ -221,7 +221,6 @@ class plMetalPipeline : public pl3DPipeline bool IPopShadowCastState(plShadowSlave* slave); void IResetRenderTargetPools(); void IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span); - plMetalTextureRef* IGetULutTextureRef(); plMetalTextureRef* fULutTextureRef; void ISetupShadowLight(plShadowSlave* slave); void IMakeRenderTargetPools(); From 207978b8455ae3f345fc60be3e1acf845a812948 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 27 Mar 2022 13:36:33 -0700 Subject: [PATCH 021/165] Initial version of WaveDec1Lay_7 Ahnonay is loading now. But there are clear water rendering issues in Ahnonay that will need to be resolved. --- .../ShaderSrc/WaveDec1Lay_7.metal | 281 ++++++++++++++++++ .../pfMetalPipeline/plMetalPipelineState.cpp | 14 + 2 files changed, 295 insertions(+) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal new file mode 100644 index 0000000000..7ba5f6daf4 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal @@ -0,0 +1,281 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct { + matrix_float4x4 WorldToNDC; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 Scrunch; // UNUSED + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 Tex0_Row0; + float4 Tex0_Row1; + float4 Tex1_Row0; + float4 Tex1_Row1; + float4 L2WRow0; + float4 L2WRow1; + float4 L2WRow2; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 Bias; // Only using one slot + float4 MatColor; + float4 CameraPos; // Only used by DecalEnv + float4 EnvAdjust; // Only used by DecalEnv + float4 FogSet; + float4 QADirX; + float4 QADirY; + + float4 DirXW; // Only used by DecalEnv + float4 DirYW; // Only used by DecalEnv + float4 WK; // Only used by DecalEnv + float4 DirXSqKW; // Only used by DecalEnv + float4 DirXDirYKW; // Only used by DecalEnv + float4 DirYSqKW; // Only used by DecalEnv +} vs_WaveDev1Lay_7Uniforms; + +typedef struct { + float4 position [[position]]; + half4 c0; + float4 texCoord0; + half4 fog; +} vs_WaveDev1Lay_7InOut; + +vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], + constant vs_WaveDev1Lay_7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + vs_WaveDev1Lay_7InOut out; + // Store our input position in world space in r6 + float4 worldPosition = float4(0); + worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); + worldPosition.y = dot(float4(in.position, 1.0), uniforms.L2WRow1); + worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); + // Fill out our w (m4x3 doesn't touch w). + worldPosition.w = 1.0; + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = illumination + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c22 = waterlevel + offset + // c23 = (maxAtten - minAtten) / depthFalloff + // c24 = minAtten. + // And in particular: + // c22.w = waterlevel + // c23.w = 1.f; + // c24.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance += uniforms.DirectionY * worldPosition.yyyy; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = (distance * uniforms.Frequency) + uniforms.Phase; + + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + distance += uniforms.PiConsts.zzzz; + distance *= (1.0f/(2.0f * M_PI_F)); + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= (2.0f * M_PI_F); + // dist += -kPi; + distance += -M_PI_F; + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = max(filter, uniforms.NumericConsts.xxxx); + filter = min(filter, uniforms.NumericConsts.zzzz); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = float4(0); + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= kAmplitude.xyzw; // Combine? + //METAL NOTE: cosDist is now r7 + cosDist *= uniforms.Amplitude; + // cosDist *= filter; + cosDist *= filter; + // Pos = (in.x + S, in.y + R, r6.z) + // S = sum(k Dir.x A cos()) + // R = sum(k Dir.y A cos()) + // c30 = k Dir.x A + // c31 = k Dir.y A + // S = sum(cosDist * c30); + worldPosition.xy += float2( + dot(cosDist, uniforms.QADirX), + dot(cosDist, uniforms.QADirY) + ); + + // Bias our vert up a bit to compensate for precision errors. + // In particular, our filter coefficients are coming in as + // interpolated bytes, so there's bound to be a lot of slop + // from that. We've got a free slot in c25.x, so we'll use that. + // A better implementation would be to bias and scale our screen + // vert, effectively pushing the vert toward the camera without + // actually moving it, but this is easier and might work just + // as well. + worldPosition.z += uniforms.Bias.x; + + // + // // Transform position to screen + // + // + out.position = worldPosition * uniforms.WorldToNDC; + out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; + + // Output color is vertex green + // Output alpha is vertex red (vtx alpha is used for wave filtering) + // Whole thing modulated by material color/opacity. + + out.c0 = half4(in.color.yyyx) * half4(uniforms.MatColor); + + // Usual texture transform + out.texCoord0.x = dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0); + out.texCoord0.y = dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1); + out.texCoord0.z = 0.0f; + out.texCoord0.w = 0.0f; + + return out; +} + +fragment half4 ps_CalphaAbase(vs_WaveDev1Lay_7InOut in [[stage_in]], + texture2d texture [[ texture(0) ]]) { + + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + return texture.sample(colorSampler, in.texCoord0.xy) * in.c0; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 6b3677decd..81c9a16460 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -409,6 +409,13 @@ const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL: (NS::Error **)nullptr ); break; + case plShaderID::vs_WaveDec1Lay_7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveDec1Lay_7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; default: hsAssert(0, "unknown shader requested"); } @@ -454,6 +461,13 @@ const MTL::Function* plMetalDynamicMaterialPipelineState::GetFragmentFunction(MT (NS::Error **)nullptr ); break; + case plShaderID::ps_CbaseAbase: + fragFunction = library->newFunction( + NS::String::string("ps_CbaseAbase", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; default: hsAssert(0, "unknown shader requested"); } From 793e9b99403dd3b3e7a069efbd6c3015012eadc0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 27 Mar 2022 18:33:34 -0700 Subject: [PATCH 022/165] Initial fixes for wave sets --- .../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal | 2 +- .../FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal | 6 ++++-- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index cb995d7023..49ebf22c1a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -372,7 +372,7 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], texture2d normalMap [[ texture(0) ]], - texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3) ]]) { + texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1) ]]) { // Very simular to ps_WaveFixed.inl. Only the final coloring is different. // Even though so far they are identical. diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index 803b0d0fb7..55f2dfa3d9 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -101,13 +101,15 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], float3 column1 = float3(uniforms.LocalToWorldRow1[0], uniforms.LocalToWorldRow2[0], uniforms.LocalToWorldRow3[0]); float3 column2 = float3(uniforms.LocalToWorldRow1[1], uniforms.LocalToWorldRow2[1], uniforms.LocalToWorldRow3[1]); float3 column3 = float3(uniforms.LocalToWorldRow1[2], uniforms.LocalToWorldRow2[2], uniforms.LocalToWorldRow3[2]); + float3 column4 = float3(uniforms.LocalToWorldRow1[3], uniforms.LocalToWorldRow2[3], uniforms.LocalToWorldRow3[3]); - matrix_float3x3 localToWorld; + matrix_float4x3 localToWorld; localToWorld[0] = column1; localToWorld[1] = column2; localToWorld[2] = column3; + localToWorld[3] = column4; - float4 worldPosition = float4(in.position * localToWorld, uniforms.NumericConsts.z); + float4 worldPosition = float4(localToWorld * float4(in.position, 1.0), 1.0); // diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index efc6b16ac3..fb48673431 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1349,7 +1349,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons work is done. */ - for (size_t i = mRef->GetPassIndex(pass); i < mRef->GetPassIndex(pass) + mRef->fPassLengths[pass]; i++) { + for (size_t i = 0; i < material->GetNumLayers(); i++) { plLayerInterface* layer = material->GetLayer(i); if (!layer) { return false; @@ -1375,7 +1375,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons idOffset = FragmentShaderArgumentAttributeCubicTextures; } - fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i - mRef->GetPassIndex(pass) + idOffset); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i + idOffset); } } else { //"Fixed" path From 946f10f77e53f51847867568f6b39d93e9c84293 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 27 Mar 2022 20:39:51 -0700 Subject: [PATCH 023/165] Initial fixes for WaveDecEnv --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 + .../ShaderSrc/WaveDecEnv.metal | 32 +++++++++---------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 24f1e39c30..e165024bef 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -127,6 +127,7 @@ elseif(APPLE) ../../FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal ) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index 49ebf22c1a..1c1b353921 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -313,17 +313,17 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // Okay, got everything we need, construct r1-3 as surface2world*texture2surface. float4 r1, r2, r3 = float4(0); - r1.x = dot(r7, float4(in.texCoord1, 1.0)); - r1.y = dot(r7, float4(in.texCoord2, 1.0)); + r1.x = dot(r7.xyz, in.texCoord1); + r1.y = dot(r7.xyz, in.texCoord2); r1.z = dot(r7, r5); - r2.x = dot(r8, float4(in.texCoord1, 1.0)); - r2.y = dot(r8, float4(in.texCoord2, 1.0)); - r2.z = dot(r8, r5); + r2.x = dot(r8.xyz, in.texCoord1.xyz); + r2.y = dot(r8.xyz, in.texCoord2.xyz); + r2.z = dot(r8.xyz, r5.xyz); - r3.x = dot(r9, float4(in.texCoord1, 1.0)); - r3.y = dot(r9, float4(in.texCoord2, 1.0)); - r3.z = dot(r9, r5); + r3.x = dot(r9.xyz, in.texCoord1.xyz); + r3.y = dot(r9.xyz, in.texCoord2.xyz); + r3.z = dot(r9.xyz, r5.xyz); // Following section is debug only to skip the per-vert tangent space axes. //add r1, c13.zxxx, r7.zzxw; @@ -335,10 +335,10 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // See vs_WaveFixedFin6.inl for derivation of the following float4 r0 = worldPosition - uniforms.CameraPos; - r0 *= rsqrt(dot(r0, r0)); + r0 *= rsqrt(dot(r0.xyz, r0.xyz)); float4 r10 = float4(0); - r10.x = dot(r0, uniforms.EnvAdjust); + r10.x = dot(r0.xyz, uniforms.EnvAdjust.xyz); r10.y = (r10.x * r10.x) - uniforms.EnvAdjust.w; r10.z = (r10.y * rsqrt(r10.y)) + r10.x; @@ -356,14 +356,14 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // Note we're accounting for our environment map being flipped from // D3D (and all rational thought) by putting r2 into UV3 and r3 into UV2. r10.w = uniforms.NumericConsts.z; - r10.x = rsqrt(dot(r1, r1)); - out.texCoord0 = r1 * r10.xxxw; + r10.x = rsqrt(dot(r1.xyz, r1.xyz)); + out.texCoord1 = r1 * r10.xxxw; - r10.x = rsqrt(dot(r3, r3)); - out.texCoord1 = r3 * r10.xxxw; + r10.x = rsqrt(dot(r3.xyz, r3.xyz)); + out.texCoord2 = r3 * r10.xxxw; - r10.x = rsqrt(dot(r2, r2)); - out.texCoord2 = r2 * r10.xxxw; + r10.x = rsqrt(dot(r2.xyz, r2.xyz)); + out.texCoord3 = r2 * r10.xxxw; out.c1 = clamp(float4(in.color).yyyx/255.0 * uniforms.MatColor, 0.0, 1.0); From bec6acd8f79137c5741c4472f6817fbf792d16bf Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 28 Mar 2022 22:31:21 -0700 Subject: [PATCH 024/165] Fixes for wave shaders Fixes directly to the WaveDecEnv shader. Minor changes to WaveSet7. Fixes for function hash table confusing different shaders and not tracking blend mode properly. Possibly related to GPU crashing, and fixes blend mode issues with shaders. --- .../ShaderSrc/WaveDecEnv.metal | 40 +++++++++---------- .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 5 ++- .../pfMetalPipeline/plMetalPipelineState.h | 11 ++++- 3 files changed, 32 insertions(+), 24 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index 1c1b353921..e8c6cb8417 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -136,7 +136,7 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // dist *= kTwoPi; distance *= uniforms.PiConsts.wwww; // dist += -kPi; - distance += uniforms.PiConsts.zzzz; + distance -= uniforms.PiConsts.zzzz; // // sincos(dist, sinDist, cosDist); @@ -192,9 +192,6 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // r8.z == dampened wave height in world space // r6.z == wave height clamped to never go beneath ground level // - // cosDist *= kAmplitude.xyzw; // Combine? - //METAL NOTE: cosDist is now r7 - cosDist *= uniforms.Amplitude; // cosDist *= filter; cosDist *= filter; // Pos = (in.x + S, in.y + R, r6.z) @@ -287,12 +284,12 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], float4 r7 = float4(in.texCoord2, 1.0); float4 r5 = float4(0); - r5.xyz = r7.yzx * in.texCoord3; - r5.xyz = (r7.zxy * -in.texCoord3) + r5.xyz; + r5.xyz = r7.yzx * in.texCoord3.zxy; + r5.xyz = (r7.zxy * -in.texCoord3.yzx) + r5.xyz; // Okay, r1 currently has the vector of cosines, and r2 has vector of sines. // Everything will want that times amplitude, so go ahead and fold that in. - cosDist *= uniforms.Phase; + cosDist *= uniforms.Amplitude; r7.x = dot(sinDist, -uniforms.DirXSqKW); r7.y = dot(sinDist, -uniforms.DirXDirYKW); @@ -302,10 +299,10 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], float4 r8 = float4(0); r8.x = dot(sinDist, -uniforms.DirXDirYKW); r8.y = dot(sinDist, -uniforms.DirYSqKW); - r8.z = dot(cosDist, uniforms.DirYW); + r8.z = dot(cosDist, -uniforms.DirYW); r8.y = r8.y + uniforms.NumericConsts.z; - float4 r9 = float4(0); + float4 r9 = out.position; r9.z = dot(cosDist, -uniforms.WK); r9.x = -r7.z; r9.y = -r8.z; @@ -313,16 +310,16 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // Okay, got everything we need, construct r1-3 as surface2world*texture2surface. float4 r1, r2, r3 = float4(0); - r1.x = dot(r7.xyz, in.texCoord1); - r1.y = dot(r7.xyz, in.texCoord2); - r1.z = dot(r7, r5); + r1.x = dot(r7.xyz, in.texCoord2); + r1.y = dot(r7.xyz, in.texCoord3); + r1.z = dot(r7.xyz, r5.xyz); - r2.x = dot(r8.xyz, in.texCoord1.xyz); - r2.y = dot(r8.xyz, in.texCoord2.xyz); + r2.x = dot(r8.xyz, in.texCoord2); + r2.y = dot(r8.xyz, in.texCoord3); r2.z = dot(r8.xyz, r5.xyz); - r3.x = dot(r9.xyz, in.texCoord1.xyz); - r3.y = dot(r9.xyz, in.texCoord2.xyz); + r3.x = dot(r9.xyz, in.texCoord2); + r3.y = dot(r9.xyz, in.texCoord3); r3.z = dot(r9.xyz, r5.xyz); // Following section is debug only to skip the per-vert tangent space axes. @@ -365,7 +362,8 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], r10.x = rsqrt(dot(r2.xyz, r2.xyz)); out.texCoord3 = r2 * r10.xxxw; - out.c1 = clamp(float4(in.color).yyyx/255.0 * uniforms.MatColor, 0.0, 1.0); + float4 matColor = uniforms.MatColor; + out.c1 = clamp(float4(in.color).yyyz/255.0 * matColor, 0.0, 1.0); return out; } @@ -381,9 +379,9 @@ fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], min_filter::linear, address::repeat); float4 t0 = 2 * normalMap.sample(colorSampler, in.texCoord0.xy) - 0.5; - float u = dot(in.texCoord1, t0); - float v = dot(in.texCoord2, t0); - float w = dot(in.texCoord3, t0); + float u = dot(in.texCoord1.xyz, t0.xyz); + float v = dot(in.texCoord2.xyz, t0.xyz); + float w = dot(in.texCoord3.xyz, t0.xyz); float3 N = float3(u, v, w); float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); @@ -397,6 +395,6 @@ fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], // is to multiply t3 by v0 into r0 and we're done. float4 out = float4(environmentMap.sample(colorSampler, coord)); out.rgb = (out.rgb * in.c1.rgb); - out.a = t0.x * in.c1.x; + out.a = t0.a * in.c1.a; return out; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index 55f2dfa3d9..2190140299 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -342,13 +342,14 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], // //https://developer.download.nvidia.com/books/HTML/gpugems/gpugems_ch01.html - float4 r0; + + float4 r0 = float4(0); { float3 D = r5.xyz; float3 F = uniforms.EnvAdjust.xyz; float G = uniforms.EnvAdjust.w; - float3 t = dot(D, F) + sqrt(pow(dot(D, F), 2) - G);// r10.z = D dot F + SQRT((D dot F)^2 - G) + float3 t = dot(D.xyz, F.xyz) + sqrt(pow(dot(D.xyz, F.xyz), 2) - G);// r10.z = D dot F + SQRT((D dot F)^2 - G) r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 09dc1d4a68..df84dfb006 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -222,7 +222,16 @@ class plMetalDynamicMaterialPipelineState: public plMetalPipelineState { if (!dynamicState) { return false; } - return dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID; + return dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID && dynamicState->fBlendMode == fBlendMode; + } + + size_t GetHash() const override { + std::size_t value = std::hash()(fFragmentShaderID); + value ^= std::hash()(fVertexShaderID); + value ^= std::hash()(fVertexShaderID); + value ^= std::hash()(fBlendMode); + + return value; } const MTL::Function* GetVertexFunction(MTL::Library *library) override; From fc72800cca94809f77322671a386e75b68f9603e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 28 Mar 2022 23:51:11 -0700 Subject: [PATCH 025/165] Adjusting Z-Bias Water in Ahnonay uses it, but it wasn't quite enough to prevent all collisions --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index fb48673431..d7a6c204e4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1886,7 +1886,7 @@ void plMetalPipeline::IHandleZMode(hsGMatState flags) } if (flags.fZFlags & hsGMatState::kZIncLayer) { - fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, -1.1, -1.1); + fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, -2.0, -2.0); } else { fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, 0.0, 0.0); } From fdc6d2710b2c7ea9ae8e3d225ad57f0c4b7b0bbd Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 29 Mar 2022 12:53:33 -0700 Subject: [PATCH 026/165] More fixes to wave set shaders --- .../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal | 2 +- .../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal index 7ba5f6daf4..69b4311359 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal @@ -259,7 +259,7 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], // Output alpha is vertex red (vtx alpha is used for wave filtering) // Whole thing modulated by material color/opacity. - out.c0 = half4(in.color.yyyx) * half4(uniforms.MatColor); + out.c0 = half4(in.color.yyyz)/255.0 * half4(uniforms.MatColor); // Usual texture transform out.texCoord0.x = dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index e8c6cb8417..bf19dfee30 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -378,7 +378,7 @@ fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], mag_filter::linear, min_filter::linear, address::repeat); - float4 t0 = 2 * normalMap.sample(colorSampler, in.texCoord0.xy) - 0.5; + float4 t0 = 2 * (normalMap.sample(colorSampler, in.texCoord0.xy) - 0.5); float u = dot(in.texCoord1.xyz, t0.xyz); float v = dot(in.texCoord2.xyz, t0.xyz); float w = dot(in.texCoord3.xyz, t0.xyz); From 0ef446cc06d169c9c4a7c698cb343a45fe1b1e3a Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 29 Mar 2022 22:24:38 -0700 Subject: [PATCH 027/165] Shader was named wrong, fixing --- .../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal index 69b4311359..80db38f3a4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal @@ -270,7 +270,7 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], return out; } -fragment half4 ps_CalphaAbase(vs_WaveDev1Lay_7InOut in [[stage_in]], +fragment half4 ps_CbaseAbase(vs_WaveDev1Lay_7InOut in [[stage_in]], texture2d texture [[ texture(0) ]]) { constexpr sampler colorSampler = sampler(mip_filter::linear, From 2b603619fc464c31d2582a1c331df681844c3a51 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 29 Mar 2022 23:01:42 -0700 Subject: [PATCH 028/165] Fixing unintentional overflow Metal was getting a pointer to a uint8_t when it expected a short for the UV count. This was leading to UV count sometimes being read wrong. --- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 81c9a16460..9e11efb97c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -61,7 +61,8 @@ plMetalPipelineState::plMetalPipelineState(plMetalDevice* device, const plMetalV void plMetalPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const { - constants->setConstantValue(&fNumUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); + ushort numUVs = fNumUVs; + constants->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); } size_t plMetalPipelineState::GetHash() const { From be97c7242e839cd8d67dbbfac0c977f750181ac2 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 30 Mar 2022 21:42:22 -0700 Subject: [PATCH 029/165] Adding Wave Rip --- .../pfMetalPipeline/ShaderSrc/WaveRip.metal | 319 ++++++++++++++++++ .../pfMetalPipeline/plMetalPipelineState.cpp | 14 + 2 files changed, 333 insertions(+) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal new file mode 100644 index 0000000000..441ee1cd7b --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal @@ -0,0 +1,319 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +#include "ShaderVertex.h" + +typedef struct { + matrix_float4x4 WorldToNDC; + float4 FogSet; + float4 Frequency; + float4 Phase; + float4 Amplitude; + float4 DirectionX; + float4 DirectionY; + float4 QADirX; + float4 QADirY; + float4 Scrunch; + float4 SinConsts; + float4 CosConsts; + float4 PiConsts; + float4 NumericConsts; + float4 CameraPos; + float4 WindRot; + float4 Tex0_Row0; + float4 Tex0_Row1; + float4 Tex0_Row2; + float4 Tex1_Row0; + float4 Tex1_Row1; + float4 Tex1_Row2; + float4 LocalToWorld; + float4 L2WRow0; + float4 L2WRow1; + float4 L2WRow2; + float4 Lengths; + float4 WaterLevel; + float4 DepthFalloff; + float4 MinAtten; + float4 TexConsts; + float4 LifeConsts; + float4 RampBias; +} vs_WaveRip7Uniforms; + +typedef struct { + float4 position [[position]]; + float4 c1; + float4 texCoord0; + float fog; +} waveRipInOut; + +vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], + constant vs_WaveRip7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + waveRipInOut out; + + // Store our input position in world space in r6 + float4 worldPosition = float4(0); + worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); + worldPosition.y = dot(float4(in.position, 1.0), uniforms.L2WRow1); + worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); + // Fill out our w (m4x3 doesn't touch w). + worldPosition.w = 1.0; + + // + + // Input diffuse v5 color is: + // v5.r = overall transparency + // v5.g = illumination + // v5.b = overall wave scaling + // + // v5.a is: + // v5.w = 1/(2.f * edge length) + // So per wave filtering is: + // min(max( (waveLen * v5.wwww) - 1), 0), 1.f); + // So a wave effect starts dying out when the wave is 4 times the sampling frequency, + // and is completely filtered at 2 times sampling frequency. + + // We'd like to make this autocalculated based on the depth of the water. + // The frequency filtering (v5.w) still needs to be calculated offline, because + // it's dependent on edge length, but the first 3 filterings can be calculated + // based on this vertex. + // Basically, we want the transparency, reflection strength, and wave scaling + // to go to zero as the water depth goes to zero. Linear falloffs are as good + // a place to start as any. + // + // depth = waterlevel - r6.z => depth in feet (may be negative) + // depthNorm = depth / depthFalloff => zero at watertable, one at depthFalloff beneath + // atten = minAtten + depthNorm * (maxAtten - minAtten); + // These are all vector ops. + // This provides separate ramp ups for each of the channels (they reach full unfiltered + // values at different depths), but doesn't provide separate controls for where they + // go to zero (they all go to zero at zero depth). For that we need an offset. An offset + // in feet (depth) is probably the most intuitive. So that changes the first calculation + // of depth to: + // depth = waterlevel - r6.z + offset + // = (waterlevel + offset) - r6.z + // And since we only need offsets for 3 channels, we can make the waterlevel constant + // waterlevel[chan] = watertableheight + offset[chan], + // with waterlevel.w = watertableheight. + // + // So: + // c22 = waterlevel + offset + // c23 = (maxAtten - minAtten) / depthFalloff + // c24 = minAtten. + // And in particular: + // c22.w = waterlevel + // c23.w = 1.f; + // c24.w = 0; + // So r4.w is the depth of this vertex in feet. + + // Dot our position with our direction vectors. + float4 distance = uniforms.DirectionX * worldPosition.xxxx; + distance += uniforms.DirectionY * worldPosition.yyyy; + + // + // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); + distance = (distance * uniforms.Frequency) + uniforms.Phase; + + // // Now we need dist mod'd into range [-Pi..Pi] + // dist *= rcp(kTwoPi); + distance += uniforms.PiConsts.zzzz; + distance *= 1.0f / uniforms.PiConsts.wwww; + + // dist = frac(dist); + distance = fract(distance); + // dist *= kTwoPi; + distance *= uniforms.PiConsts.wwww; + // dist += -kPi; + distance -= uniforms.PiConsts.zzzz; + + // + // sincos(dist, sinDist, cosDist); + // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z + // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z + + float4 pow2 = distance * distance; // r0^2 + float4 pow3 = pow2 * distance; // r0^3 - probably stall + float4 pow4 = pow2 * pow2; // r0^4 + float4 pow5 = pow2 * pow3; // r0^5 + float4 pow7 = pow2 * pow5; // r0^7 + + //r1 + float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; + //r2 + float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; + + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; + sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; + + // Calc our depth based filtering here into r4 (because we don't use it again + // after here, and we need our filtering shortly). + float4 depth = uniforms.WaterLevel - worldPosition.zzzz; + depth *= uniforms.DepthFalloff; + depth += uniforms.MinAtten; + // Clamp .xyz to range [0..1] + depth = clamp(depth, 0, 1); + + // Calc our filter (see above). + float4 inColor = float4(in.color) / 255.0f; + float4 filter = inColor.wwww * uniforms.Lengths; + filter = max(filter, uniforms.NumericConsts.xxxx); + filter = min(filter, uniforms.NumericConsts.zzzz); + + //mov r2, r1; + // r2 == sinDist + // r1 == cosDist + // sinDist *= filter; + sinDist *= filter; + // sinDist *= kAmplitude.xyzw + sinDist *= uniforms.Amplitude; + // r5 is now T = sum(Ai * sin()) + // METAL NOTE: from here on, r5 is sinDist + // height = dp4(sinDist, kOne); + // accumPos.z += height; (but accumPos.z is currently 0). + float4 accumPos = float4(0); + accumPos.x = dot(sinDist, uniforms.NumericConsts.zzzz); + accumPos.y = accumPos.x * depth.z; + accumPos.z = accumPos.y + uniforms.WaterLevel.w; + worldPosition.z = max(worldPosition.z, accumPos.z); // CLAMP + // r8.x == wave height relative to 0 + // r8.y == dampened wave relative to 0 + // r8.z == dampened wave height in world space + // r6.z == wave height clamped to never go beneath ground level + // + // cosDist *= filter; + cosDist *= filter; + // Pos = (in.x + S, in.y + R, r6.z) + // S = sum(k Dir.x A cos()) + // R = sum(k Dir.y A cos()) + // c30 = k Dir.x A + // c31 = k Dir.y A + // S = sum(cosDist * c30); + worldPosition.xy += float2( + dot(cosDist, uniforms.QADirX), + dot(cosDist, uniforms.QADirY) + ); + + // Bias our vert up a bit to compensate for precision errors. + // In particular, our filter coefficients are coming in as + // interpolated bytes, so there's bound to be a lot of slop + // from that. We've got a free slot in c25.x, so we'll use that. + // A better implementation would be to bias and scale our screen + // vert, effectively pushing the vert toward the camera without + // actually moving it, but this is easier and might work just + // as well. + worldPosition.z += uniforms.RampBias.z; + + // + // // Transform position to screen + // + // + out.position = worldPosition * uniforms.WorldToNDC; + out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; + + // Now onto texture coordinate generation. + // + // First is the usual texture transform + out.texCoord0 = float4( + dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0), + dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1), + uniforms.NumericConsts.zz + ); + + // Dyna Stuff + // Constants + // c33 = fC1U, fC2U, fC1V, fC2V + // c34 = fInitAtten, t, life, 1.f / (life-decay) + // c35 = ramp, 1.f / ramp, BIAS (positive is up), FREE + // + // Vertex Info + // v7.z = fBirth (because we don't use it for anything else). + // + // Initialize r1.zw to 0,1 + + float4 r1 = float4(0,0,0,1); + // Calc r1.x = age, r1.y = atten + // age = t - birth. + r1.x = uniforms.LifeConsts.y - in.position.z; + // atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay)); + // first clamp0_1(age/ramp) + r1.y = r1.x - uniforms.RampBias.y; + r1.y = min(r1.y, 1.0f); + // now clamp0_1((life-age) / (life-decay)); + r1.z = uniforms.LifeConsts.z - in.position.x; + r1.z *= uniforms.LifeConsts.w; + r1.z = clamp(r1.z, 0.0f, 1.0f); + r1.y *= r1.z; + + // color is (atten, atten, atten, 1.f) + // Need to calculate opacity we would have had from vs_WaveFixedFin7.inl + // Right now that's just modulating by r4.y. + + out.c1 = (depth * uniforms.LifeConsts.x) * r1.yyyw; + + // UVW = (inUVW - 0.5) * scale + 0.5 + // where: + // scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f + float4 r2 = float4(0,0,0,1); + r2.xy = r1.xx * uniforms.TexConsts.yw; + r2.xy += 1.0f; + r2.xy = (1.0f/r2.xy); + r2.xy *= uniforms.TexConsts.xz; + r1.xy = in.position.xy - 0.5f; + r1.xy *= r2.xy; + r1.xy += 0.5f; + out.texCoord0 = r1; + + return out; +} + +fragment half4 ps_WaveRip(waveRipInOut in [[stage_in]], + texture2d texture [[ texture(0) ]]) { + constexpr sampler colorSampler = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::repeat); + half4 t0 = texture.sample(colorSampler, in.texCoord0.xy); + + return t0 * half4(in.c1); +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 9e11efb97c..eaa866e299 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -417,6 +417,13 @@ const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL: (NS::Error **)nullptr ); break; + case plShaderID::vs_WaveRip7: + vertFunction = library->newFunction( + NS::String::string("vs_WaveRip7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; default: hsAssert(0, "unknown shader requested"); } @@ -469,6 +476,13 @@ const MTL::Function* plMetalDynamicMaterialPipelineState::GetFragmentFunction(MT (NS::Error **)nullptr ); break; + case plShaderID::ps_WaveRip: + fragFunction = library->newFunction( + NS::String::string("ps_WaveRip", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error **)nullptr + ); + break; default: hsAssert(0, "unknown shader requested"); } From 4e862c13fd5283e590a9a1f00e80ad5f9c0af464 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 30 Mar 2022 21:43:40 -0700 Subject: [PATCH 030/165] Fixing issue in WaveSet7 sqrt in HLSL 1.1 implies an absolute value taken of the value, according to the spec. This fixes an issue where water might not render from some angles. This bug could also be present in other shaders, I have not checked yet, --- .../Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index 2190140299..a6ddea4b1d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -349,7 +349,8 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], float3 D = r5.xyz; float3 F = uniforms.EnvAdjust.xyz; float G = uniforms.EnvAdjust.w; - float3 t = dot(D.xyz, F.xyz) + sqrt(pow(dot(D.xyz, F.xyz), 2) - G);// r10.z = D dot F + SQRT((D dot F)^2 - G) + //METAL NOTE: HLSL 1.1 always applies an abs operation to values it's about to sqrt + float3 t = dot(D.xyz, F.xyz) + sqrt(abs(pow(dot(D.xyz, F.xyz), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) } From 831cbc9f0778b17a4e0123a42e6483addd668227 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 30 Mar 2022 23:14:28 -0700 Subject: [PATCH 031/165] Adding wave rip cmake --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index e165024bef..fa91461465 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -128,6 +128,7 @@ elseif(APPLE) ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal ) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) From 79a26f8c195e93e008e229b7f8733844d10b293d Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 31 Mar 2022 11:32:08 -0700 Subject: [PATCH 032/165] Adding hardware single weight skinning Some aspects of this are temporary until multiple wieght blending is introduced into the GPU pipeline. Integrated into the vertex shader. Not sure if this should be a compute shader eventually. --- .../ShaderSrc/FixedPipelineShaders.metal | 8 +++++++- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 9 ++++++--- .../pfMetalPipeline/ShaderSrc/ShaderVertex.h | 5 +++++ .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 13 +++---------- .../pfMetalPipeline/plMetalPipelineState.cpp | 9 +++++++++ 5 files changed, 30 insertions(+), 14 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 401f613bd2..f8dfa350d7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -247,6 +247,7 @@ constant constexpr sampler colorSamplers[] = { vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + constant float4x4 & blendMatrix1 [[ buffer(BufferIndexBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]], uint v_id [[vertex_id]]) { ColorInOut out; @@ -305,7 +306,12 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); - const float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + float4 position = (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + if(temp_hasOnlyWeight1) { + const float4 position2 = blendMatrix1 * float4(in.position, 1.0); + position = (in.weight1 * position) + ((1.0f - in.weight1) * position2); + } + const float4 vCamPosition = uniforms.worldToCameraMatrix * position; //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); //Fog diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index 8a9a64fbf0..0c1c5caff6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -59,7 +59,8 @@ enum plMetalShaderArgumentIndex BufferIndexState = 2, BufferIndexUniforms = 3, BufferIndexFragArgBuffer = 5, - BufferIndexShadowCastFragArgBuffer = 4 + BufferIndexShadowCastFragArgBuffer = 4, + BufferIndexBlendMatrix1 = 6 }; enum plMetalVertexShaderUniform @@ -68,7 +69,8 @@ enum plMetalVertexShaderUniform VertexAttributeTexcoord = 1, VertexAttributeNormal = 9, VertexAttributeUVCount = 10, - VertexAttributeColor = 11 + VertexAttributeColor = 11, + VertexAttributeWeights = 12, }; enum plMetalFragmentShaderUniform @@ -85,7 +87,8 @@ enum plMetalFunctionConstant FunctionConstantNumLayers = 1, FunctionConstantSources = 2, FunctionConstantBlendModes = 10, - FunctionConstantLayerFlags = 18 + FunctionConstantLayerFlags = 18, + FunctionConstantNumWeights = 26, }; enum plMetalLayerPassType: uint8_t diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h index 1e6f879734..c6b94e1964 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderVertex.h @@ -61,10 +61,15 @@ constant bool hasLayer6 = num_layers > 5; constant bool hasLayer7 = num_layers > 6; constant bool hasLayer8 = num_layers > 7; +constant uint8_t num_weights [[ function_constant(FunctionConstantNumWeights) ]]; +constant bool hasWeight1 = num_weights > 0; +constant bool temp_hasOnlyWeight1 = num_weights == 1; + typedef struct { float3 position [[attribute(VertexAttributePosition)]]; float3 normal [[attribute(VertexAttributeNormal)]]; + float weight1 [[attribute(VertexAttributeWeights), function_constant(hasWeight1)]]; float3 texCoord1 [[attribute(VertexAttributeTexcoord), function_constant(hasTexture1)]]; float3 texCoord2 [[attribute(VertexAttributeTexcoord+1), function_constant(hasTexture2)]]; float3 texCoord3 [[attribute(VertexAttributeTexcoord+2), function_constant(hasTexture3)]]; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index d7a6c204e4..a8290d1798 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -940,19 +940,12 @@ void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& fView.fLocalToWorldLeftHanded = lastL2W.GetParity(); } -#if 0 // Skinning if( span.fNumMatrices == 2 ) { - D3DXMATRIX mat; - IMatrix44ToD3DMatrix(mat, drawable->GetPaletteMatrix(span.fBaseMatrix+1)); - fD3DDevice->SetTransform(D3DTS_WORLDMATRIX(1), &mat); - fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_1WEIGHTS); + matrix_float4x4 mat; + hsMatrix2SIMD(drawable->GetPaletteMatrix(span.fBaseMatrix+1), &mat); + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), BufferIndexBlendMatrix1); } - else - { - fD3DDevice->SetRenderState(D3DRS_VERTEXBLEND, D3DVBF_DISABLE); - } -#endif fCurrentRenderPassUniforms->projectionMatrix = fDevice.fMatrixProj; fCurrentRenderPassUniforms->worldToCameraMatrix = fDevice.fMatrixW2C; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index eaa866e299..7aacd7cc4e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -63,6 +63,7 @@ void plMetalPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* con { ushort numUVs = fNumUVs; constants->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); + constants->setConstantValue(&fNumWeights, MTL::DataTypeUChar, FunctionConstantNumWeights); } size_t plMetalPipelineState::GetHash() const { @@ -123,6 +124,14 @@ void plMetalPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vert vertexDescriptor->attributes()->object(VertexAttributeNormal)->setBufferIndex(0); vertexDescriptor->attributes()->object(VertexAttributeNormal)->setOffset(normOffset); + if(this->fNumWeights > 0) { + int weightOneOffset = skinWeightOffset; + + vertexDescriptor->attributes()->object(VertexAttributeWeights)->setFormat(MTL::VertexFormatFloat); + vertexDescriptor->attributes()->object(VertexAttributeWeights)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeWeights)->setOffset(weightOneOffset); + } + for(int i=0; ifNumUVs; i++) { vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setFormat(MTL::VertexFormatFloat3); vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setBufferIndex(0); From 59133aa301e6932824d34403b19f548ae8a28a92 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 31 Mar 2022 20:03:24 -0700 Subject: [PATCH 033/165] Fixing fog color and blend issues --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 2 +- .../FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index f8dfa350d7..5baf2bca61 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -519,7 +519,7 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], currentColor = half4(in.vtxColor.rgb, 1.0) * currentColor; } - currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb, 1.0f - clamp((float)in.fogColor.a, 0.0f, 1.0f)); + currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb, (1.0f - clamp((float)in.fogColor.a, 0.0f, 1.0f)) * (float)currentColor.a); if (currentColor.a < fragmentShaderArgs.bufferedUniforms->alphaThreshold) { discard_fragment(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 7aacd7cc4e..f8182bf3f8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -207,19 +207,16 @@ void plMetalPipelineState::ConfigureBlendMode(const uint32_t blendMode, MTL::Ren // Add final color to FB. case hsGMatState::kBlendAdd: //printf("glBlendFunc(GL_ONE, GL_ONE);\n"); + descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); break; // Multiply final color by FB color and add it into the FB. case hsGMatState::kBlendMADD: //printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorDestinationColor); - descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorDestinationColor); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); break; // Final color times final alpha, added into the FB. From 38ff4c6eedec5b0d8a92a792bb95819e5f263604 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 31 Mar 2022 20:48:01 -0700 Subject: [PATCH 034/165] Adding avatar tint colors --- .../FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal | 6 ++++-- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal index 440159b02b..49e7c62b48 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal @@ -47,6 +47,7 @@ using namespace metal; typedef struct { float4 position [[position]]; float2 uvPosition; + half4 color; } PreprocessAvatarTexturesInOut; typedef struct @@ -60,14 +61,15 @@ vertex PreprocessAvatarTexturesInOut PreprocessAvatarVertexShader(PreprocessAvat } fragment half4 PreprocessAvatarFragmentShader(PreprocessAvatarTexturesInOut in [[stage_in]], - texture2d layer [[ texture(0) ]]) + texture2d layer [[ texture(0) ]], + constant float4& blendColor [[ buffer(0 )]]) { constexpr sampler colorSampler(mip_filter::linear, mag_filter::linear, min_filter::linear, address::clamp_to_zero); - half4 colorSample = layer.sample(colorSampler, in.uvPosition.xy); + half4 colorSample = layer.sample(colorSampler, in.uvPosition.xy) * half4(blendColor); return colorSample; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index a8290d1798..cef37d8a7d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2671,6 +2671,7 @@ void plMetalPipeline::IPreprocessAvatarTextures() fCurrentPipelineState = avatarRenderState; } } + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&tint, sizeof(hsColorRGBA), 0); float screenW = (float)item->fElements[j]->fWidth / layout->fOrigWidth * 2.f; float screenH = (float)item->fElements[j]->fHeight / layout->fOrigWidth * 2.f; From 6cb40d064b0b88e39659e7dd0356b72a3952d49e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 3 Apr 2022 23:02:56 -0700 Subject: [PATCH 035/165] Fixing alpha threshold MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When I first wrote this I picked just a really small number. Now I understand it should be 1.f/255.f to represent the smallest test DirectX can do. Some areas - specifically the Barons Office - are very sensitive to this value. The view outside the windows has some layers where the alpha values should be eliminated - but they hover just barely above zero. A test too small won’t catch them. I’m not sure the purpose of these layers - they don’t seem to add anything visually - but they are there in DirectX too with the same lack of visual contribution.. --- .../FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index a58d1f0cfc..daf3da9d11 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -563,7 +563,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme if (state.fBlendFlags & hsGMatState::kBlendAlphaTestHigh) { uniforms->alphaThreshold = 64.f/255.f; } else { - uniforms->alphaThreshold = 0.0001f; + uniforms->alphaThreshold = 1.f/255.f; } } else { uniforms->alphaThreshold = 0.f; From 496f415853b07711b149c96b71c7b021e67dd88e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 29 Jul 2023 17:20:29 -0700 Subject: [PATCH 036/165] Fixing memory pile up during intro movie The intro movie keeps its own run loop which prevents our autorelease pool from collecting memory until the intro movie is done. This leads to a pile up of gigs (or tens of gigs) of memory as no framebuffer can be released. Apple was nice enough to add NSAutoreleasePool to their list of bridged C++ classes though, so we can insert our own autorelease pool to capture that memory right into the pipeline. --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 3 +++ Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h | 2 ++ 2 files changed, 5 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index cef37d8a7d..23c7d94037 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -568,6 +568,7 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) bool plMetalPipeline::BeginRender() { + fCurrentPool = NS::AutoreleasePool::alloc()->init(); // offset transform RefreshScreenMatrices(); @@ -592,6 +593,7 @@ bool plMetalPipeline::BeginRender() CA::MetalDrawable *drawable = currentDrawableCallback(); if(!drawable) { + fCurrentPool->release(); return false; } fDevice.CreateNewCommandBuffer(drawable); @@ -631,6 +633,7 @@ bool plMetalPipeline::EndRender() fLayerRef[i] = nullptr; } } + fCurrentPool->release(); return retVal; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index fc411101f6..3fab942191 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -250,6 +250,8 @@ class plMetalPipeline : public pl3DPipeline std::vector fLightSourceStack; static plMetalEnumerate enumerator; + + NS::AutoreleasePool* fCurrentPool; }; #endif // _plGLPipeline_inc_ From d1ba4129ae600fd1888f08a6b42ff7ce39aeeb9f Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 8 Apr 2022 20:53:03 -0700 Subject: [PATCH 037/165] Improving state tracking MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit State tracking is meant to reduce the number of redundant calls to the Metal command encoder. The DX renderer had something similar, but implemented differently. This is the beginning of improving state tracking. I’d like to move materials over to bindless rendering, which should also improve things. There are also more general redundancies to cut down. --- .../pfMetalPipeline/plMetalPipeline.cpp | 52 +++++++++++++------ .../pfMetalPipeline/plMetalPipeline.h | 3 ++ 2 files changed, 39 insertions(+), 16 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 23c7d94037..17689d3238 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -317,7 +317,7 @@ void plMetalPipeline::PushRenderRequest(plRenderRequest *req) fView.SetMaxCullNodes(0); } - fView.fCullTreeDirty = true; + ResetMetalStateTracking(); } void plMetalPipeline::PopRenderRequest(plRenderRequest *req) @@ -330,10 +330,7 @@ void plMetalPipeline::PopRenderRequest(plRenderRequest *req) //it won't be set yet on the new target //in theory we could have a stack of these so when we unwind we //could get the state back. - fCurrentPipelineState = nullptr; - fCurrentDepthStencilState = nullptr; - - fCurrentPipelineState = nullptr; + ResetMetalStateTracking(); hsRefCnt_SafeUnRef(fView.fRenderRequest); fView = fViewStack.top(); @@ -343,6 +340,11 @@ void plMetalPipeline::PopRenderRequest(plRenderRequest *req) fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera; } +plRenderTarget* plMetalPipeline::PopRenderTarget() { + pl3DPipeline::PopRenderTarget(); + ResetMetalStateTracking(); +} + void plMetalPipeline::ClearRenderTarget(plDrawable *d) { plDrawableSpans* src = plDrawableSpans::ConvertNoRef(d); @@ -362,7 +364,7 @@ void plMetalPipeline::ClearRenderTarget(const hsColorRGBA *col, const float *dep hsColorRGBA clearColor = col ? *col : GetClearColor(); float clearDepth = depth ? *depth : fView.GetClearDepth(); fDevice.Clear(fView.fRenderState & kRenderClearColor, {clearColor.r, clearColor.g, clearColor.b, clearColor.a}, fView.fRenderState & kRenderClearDepth, 1.0); - fCurrentDepthStencilState = nullptr; + ResetMetalStateTracking(); } } @@ -572,7 +574,7 @@ bool plMetalPipeline::BeginRender() // offset transform RefreshScreenMatrices(); - fCurrentPipelineState = nullptr; + ResetMetalStateTracking(); // offset transform RefreshScreenMatrices(); @@ -613,11 +615,10 @@ bool plMetalPipeline::BeginRender() bool plMetalPipeline::EndRender() { bool retVal = false; + ResetMetalStateTracking(); if (--fInSceneDepth == 0) { fDevice.SubmitCommandBuffer(); - fCurrentPipelineState = nullptr; - fCurrentDepthStencilState = nullptr; IClearShadowSlaves(); } @@ -1007,7 +1008,10 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, if(!vRef->GetBuffer()) { return; } - fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + if (fCurrentVertexBuffer != vRef->GetBuffer()) { + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fCurrentVertexBuffer = vRef->GetBuffer(); + } fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); IPushPiggyBacks(material); @@ -1066,6 +1070,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, //if we had to render aux spans, we probably changed the vertex and index buffer //reset those fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); IRenderShadowsOntoSpan(render, &span, material, vRef); @@ -1218,6 +1223,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) return; } fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); plRenderTriListFunc render(&fDevice, 0, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength); @@ -1458,10 +1464,10 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons /*plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered, sources, blendModes, miscFlags); const MTL::RenderPipelineState *pipelineState = pipeline->pipelineState;*/ - //if(fCurrentPipelineState != pipelineState) { + if(fCurrentPipelineState != pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); fCurrentPipelineState = pipelineState; - //} + } } return true; @@ -2336,7 +2342,10 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) plMetalPlateManager *pm = (plMetalPlateManager *)fPlateMgr; - fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pm->fPlateRenderPipelineState); + if(fCurrentPipelineState != pm->fPlateRenderPipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pm->fPlateRenderPipelineState); + fCurrentPipelineState = pm->fPlateRenderPipelineState; + } float alpha = material->GetLayer(0)->GetOpacity(); fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&alpha, sizeof(float), 6); fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(pm->fDepthState); @@ -3632,8 +3641,12 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp /// Switch to the vertex buffer we want plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowCasterPipelineState(&fDevice, vRef).GetRenderPipelineState(); - fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + if(fCurrentPipelineState != linkedPipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fCurrentPipelineState = linkedPipeline->pipelineState; + } fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); @@ -3703,10 +3716,10 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con } plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); - //if(fCurrentPipelineState != linkedPipeline->pipelineState) { + if(fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fCurrentPipelineState = linkedPipeline->pipelineState; - //} + } int selfShadowNow = span->IsShadowBitSet(fShadows[i]->fIndex); @@ -4372,3 +4385,10 @@ uint32_t plMetalPipeline::IGetBufferFormatSize( uint8_t format ) const return size; } + +void plMetalPipeline::ResetMetalStateTracking() +{ + fCurrentPipelineState = nullptr; + fCurrentDepthStencilState = nullptr; + fCurrentVertexBuffer = nullptr; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 3fab942191..adcf0681a5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -164,6 +164,7 @@ class plMetalPipeline : public pl3DPipeline //cache to prevent oversetting, Metal won't catch this for us and will encode extra work const MTL::RenderPipelineState* fCurrentPipelineState; + MTL::Buffer* fCurrentVertexBuffer; MTL::DepthStencilState* fCurrentDepthStencilState; void FindFragFunction(); @@ -233,6 +234,8 @@ class plMetalPipeline : public pl3DPipeline void IReleaseRenderTargetPools(); void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); + void ResetMetalStateTracking(); + // Shadows std::vector fRenderTargetPool512; std::vector fRenderTargetPool256; From 8a6f194dcbceb23f5b868f246a0968e9a7496062 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 9 Apr 2022 22:57:27 -0700 Subject: [PATCH 038/165] Adding texture anisotropy support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This change has caused a performance decline. It’s likely related to the samplers being bound now instead of being hardcoded. Still investigating. --- .../ShaderSrc/FixedPipelineShaders.metal | 25 +++++---- .../pfMetalPipeline/plMetalDevice.cpp | 53 +++++++++++++++++++ .../pfMetalPipeline/plMetalDevice.h | 4 ++ .../pfMetalPipeline/plMetalPipeline.cpp | 24 +++++++-- 4 files changed, 91 insertions(+), 15 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 5baf2bca61..a0203bf0a1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -195,6 +195,7 @@ typedef struct { texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasLayer8) ]]; const constant half4* colors [[ buffer(FragmentShaderArgumentAttributeColors) ]]; const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; + array samplers [[ sampler(0) ]]; } FragmentShaderArguments; float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix); @@ -227,21 +228,25 @@ constant constexpr sampler colorSamplers[] = { sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, - address::repeat), + address::repeat, + max_anisotropy(16)), sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, s_address::clamp_to_edge, - t_address::repeat), + t_address::repeat, + max_anisotropy(16)), sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, s_address::repeat, - t_address::clamp_to_edge), + t_address::clamp_to_edge, + max_anisotropy(16)), sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, - address::clamp_to_edge), + address::clamp_to_edge, + max_anisotropy(16)), }; @@ -463,7 +468,7 @@ float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const ui return sampleCoord.xyz; } -half4 sampleLayer(uint8_t passType, uint8_t sampleType, uint32_t miscFlags, float3 sampleCoord, const thread half4 &color, const thread texture2d &texture, const thread texturecube &cubicTexture) { +half4 sampleLayer(uint8_t passType, uint32_t miscFlags, float3 sampleCoord, const thread half4 &color, const thread texture2d &texture, const thread texturecube &cubicTexture, const thread sampler& colorSampler) { if(passType == PassTypeColor) { return color; @@ -475,9 +480,9 @@ half4 sampleLayer(uint8_t passType, uint8_t sampleType, uint32_t miscFlags, floa //do the actual sample if(passType == PassTypeTexture) { - return texture.sample(colorSamplers[sampleType], sampleCoord.xy); + return texture.sample(colorSampler, sampleCoord.xy); } else if(passType == PassTypeCubicTexture) { - return cubicTexture.sample(colorSamplers[sampleType], sampleCoord.xyz); + return cubicTexture.sample(colorSampler, sampleCoord.xyz); } else { return half4(0); } @@ -507,7 +512,7 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], float3 sampleCoord = (&in.texCoord1)[layer]; - color = sampleLayer(sourceTypes[layer], fragmentShaderArgs.bufferedUniforms->layers[layer].sampleType, miscFlags[layer], sampleCoord, half4(in.vtxColor), (&fragmentShaderArgs.textures)[layer], (&fragmentShaderArgs.cubicTextures)[layer]); + color = sampleLayer(sourceTypes[layer], miscFlags[layer], sampleCoord, half4(in.vtxColor), (&fragmentShaderArgs.textures)[layer], (&fragmentShaderArgs.cubicTextures)[layer], fragmentShaderArgs.samplers[fragmentShaderArgs.bufferedUniforms->layers[layer].sampleType]); if(layer==0) { blendFirst(color, currentColor, blendModes[layer]); @@ -685,13 +690,13 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], //only possible alpha sources are layers 0 or 1 if(alphaSrc == 0) { - half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[0].sampleType, miscFlags[1], in.texCoord3, half4(layers.colors[0]), (&layers.textures)[0], (&layers.cubicTextures)[0]); + half4 layerColor = sampleLayer(sourceTypes[2], miscFlags[1], in.texCoord3, half4(layers.colors[0]), (&layers.textures)[0], (&layers.cubicTextures)[0], layers.samplers[sourceTypes[layers.bufferedUniforms->layers[0].sampleType]]); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; } else if(alphaSrc == 1) { - half4 layerColor = sampleLayer(sourceTypes[2], layers.bufferedUniforms->layers[1].sampleType, miscFlags[1], in.texCoord3, half4(layers.colors[1]), (&layers.textures)[1], (&layers.cubicTextures)[1]); + half4 layerColor = sampleLayer(sourceTypes[2], miscFlags[1], in.texCoord3, half4(layers.colors[1]), (&layers.textures)[1], (&layers.cubicTextures)[1], layers.samplers[sourceTypes[layers.bufferedUniforms->layers[1].sampleType]]); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 2b656c80ed..ecb36e17d8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -110,6 +110,57 @@ void plMetalDevice::Shutdown() hsAssert(0, "Shutdown not implemented for Metal rendering"); } + +void plMetalDevice::SetMaxAnsiotropy(int8_t maxAnsiotropy) +{ + //setup the material pass samplers + //load them all at once and then let the shader pick + + if (maxAnsiotropy == 0) + maxAnsiotropy = 1; + + if(fSamplerStates[0] != nullptr) { + ReleaseSamplerStates(); + } + + MTL::SamplerDescriptor *samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + samplerDescriptor->setMaxAnisotropy(maxAnsiotropy); + samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); + samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeRepeat); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeRepeat); + fSamplerStates[0] = fMetalDevice->newSamplerState(samplerDescriptor); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeClampToEdge); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeRepeat); + fSamplerStates[1] = fMetalDevice->newSamplerState(samplerDescriptor); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeRepeat); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeClampToEdge); + fSamplerStates[2] = fMetalDevice->newSamplerState(samplerDescriptor); + + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeClampToEdge); + samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeClampToEdge); + fSamplerStates[3] = fMetalDevice->newSamplerState(samplerDescriptor); +} + +void plMetalDevice::ReleaseSamplerStates() +{ + fSamplerStates[0]->release(); + fSamplerStates[0] = nullptr; + + fSamplerStates[1]->release(); + fSamplerStates[1] = nullptr; + + fSamplerStates[2]->release(); + fSamplerStates[2] = nullptr; + + fSamplerStates[3]->release(); + fSamplerStates[3] = nullptr; +} + void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth) { //Plasma may clear a target and draw at different times. @@ -196,6 +247,7 @@ void plMetalDevice::BeginNewRenderPass() { fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); } + fCurrentRenderTargetCommandEncoder->setFragmentSamplerStates(fSamplerStates, NS::Range::Make(0, 4)); } void plMetalDevice::SetRenderTarget(plRenderTarget* target) @@ -257,6 +309,7 @@ plMetalDevice::plMetalDevice() { fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; + fSamplerStates[0] = nullptr; fMetalDevice = MTL::CreateSystemDefaultDevice(); fCommandQueue = fMetalDevice->newCommandQueue(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index d31e7e6ceb..6a4aca8816 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -168,6 +168,8 @@ class plMetalDevice ///Submit the command buffer to the GPU and draws all the render passes. Clears the current command buffer. void SubmitCommandBuffer(); void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); + + void SetMaxAnsiotropy(int8_t maxAnsiotropy); private: struct plMetalPipelineRecord { @@ -226,8 +228,10 @@ class plMetalDevice float fClearRenderTargetDepth; float fClearDrawableDepth; plRenderTarget* fCurrentRenderTarget; + MTL::SamplerState* fSamplerStates[4]; void BeginNewRenderPass(); + void ReleaseSamplerStates(); }; #endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 17689d3238..d50fb7b663 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -196,7 +196,7 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons if (fMaxLayersAtOnce < 4) SetDebugFlag(plPipeDbg::kFlagBumpUV, true); //plDynamicCamMap::SetCapable(false); - plQuality::SetQuality(fDefaultPipeParams.VideoQuality); + //plQuality::SetQuality(fDefaultPipeParams.VideoQuality); //plQuality::SetCapability(fDefaultPipeParams.VideoQuality); plQuality::SetCapability(plQuality::kPS_3); //plShadowCaster::EnableShadowCast(false); @@ -209,6 +209,8 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons // RenderTarget pools are shared for our shadow generation algorithm. // Different sizes for different resolutions. IMakeRenderTargetPools(); + + fDevice.SetMaxAnsiotropy(fInitialPipeParams.AnisotropicLevel); } plMetalPipeline::~plMetalPipeline() @@ -811,20 +813,32 @@ void plMetalPipeline::GetSupportedDisplayModes(std::vector *res, int plMetalPipeline::GetMaxAnisotropicSamples() { - //FIXME: Fix antialiasing - return 0; + //Metal always supports 16. There is no device check (as far as I know.) + return 16; } int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) { - //FIXME: Fix antialiasing - return 0; + //Metal devices may not support the full antialias range + //return the max and we'll work it out later + if (fDevice.fMetalDevice->supportsTextureSampleCount(8)) { + return 8; + } + if (fDevice.fMetalDevice->supportsTextureSampleCount(4)) { + return 4; + } + if (fDevice.fMetalDevice->supportsTextureSampleCount(2)) { + return 2; + } + return 1; } void plMetalPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync) { //FIXME: Whats this? //Seems like an entry point for passing in display settings. + + fDevice.SetMaxAnsiotropy(MaxAnisotropicSamples); } void plMetalPipeline::RenderSpans(plDrawableSpans *ice, const std::vector &visList) From 236c278f97fd38e5197028fce7b23741d2119d7b Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 9 Apr 2022 23:00:19 -0700 Subject: [PATCH 039/165] =?UTF-8?q?Adding=20missing=20=E2=80=9CpopRenderTa?= =?UTF-8?q?rget=E2=80=9D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index adcf0681a5..9f4bb7f314 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -158,6 +158,8 @@ class plMetalPipeline : public pl3DPipeline void ISetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalVertexBufferRef* vRef); uint32_t IGetBufferFormatSize( uint8_t format ) const; + + plRenderTarget* PopRenderTarget() override; private: MTL::RenderPipelineState* fPipelineState; VertexUniforms* fCurrentRenderPassUniforms; From 78b314581dc022f6efc50cdcea9a73bfc48e07ee Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 10 Apr 2022 16:03:01 -0700 Subject: [PATCH 040/165] Fixing performance with anisotropic texturing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Metal didn’t like the dynamic array lookup I was doing. Converting it it to static lookups. This has restored expected performance. Also converted the sample type to a function constant. There is cleanup to do now. The fragment uniforms have been reduced to the alpha threshold, which is now awkwardly in a struct by itself. None of the layer data lives there anymore. In the pipeline, the encoding for shader lookup has gotten real messy. I’m probably going to look at adding MSAA first, but the cleanup is really overdue. I’ve had to work through multiple issues already that happened because I tried to do minor tweaks to that code but forgot to touch a specific part. --- .../ShaderSrc/FixedPipelineShaders.metal | 107 +++++++++++------- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 7 +- .../plMetalMaterialShaderRef.cpp | 59 +++++----- .../plMetalMaterialShaderRef.h | 2 +- .../pfMetalPipeline/plMetalPipeline.cpp | 20 ++++ .../pfMetalPipeline/plMetalPipelineState.cpp | 16 +++ .../pfMetalPipeline/plMetalPipelineState.h | 7 +- 7 files changed, 140 insertions(+), 78 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index a0203bf0a1..bd26077343 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -143,38 +143,48 @@ enum plUVWSrcModifiers: uint32_t { using namespace metal; -constant uint8_t sourceType1 [[ function_constant(FunctionConstantSources + 0) ]]; -constant uint8_t sourceType2 [[ function_constant(FunctionConstantSources + 1) ]]; -constant uint8_t sourceType3 [[ function_constant(FunctionConstantSources + 2) ]]; -constant uint8_t sourceType4 [[ function_constant(FunctionConstantSources + 3) ]]; -constant uint8_t sourceType5 [[ function_constant(FunctionConstantSources + 4) ]]; -constant uint8_t sourceType6 [[ function_constant(FunctionConstantSources + 5) ]]; -constant uint8_t sourceType7 [[ function_constant(FunctionConstantSources + 6) ]]; -constant uint8_t sourceType8 [[ function_constant(FunctionConstantSources + 7) ]]; - -constant uint32_t blendModes1 [[ function_constant(FunctionConstantBlendModes + 0) ]]; -constant uint32_t blendModes2 [[ function_constant(FunctionConstantBlendModes + 1) ]]; -constant uint32_t blendModes3 [[ function_constant(FunctionConstantBlendModes + 2) ]]; -constant uint32_t blendModes4 [[ function_constant(FunctionConstantBlendModes + 3) ]]; -constant uint32_t blendModes5 [[ function_constant(FunctionConstantBlendModes + 4) ]]; -constant uint32_t blendModes6 [[ function_constant(FunctionConstantBlendModes + 5) ]]; -constant uint32_t blendModes7 [[ function_constant(FunctionConstantBlendModes + 6) ]]; -constant uint32_t blendModes8 [[ function_constant(FunctionConstantBlendModes + 7) ]]; - -constant uint32_t miscFlags1 [[ function_constant(FunctionConstantLayerFlags + 0) ]]; -constant uint32_t miscFlags2 [[ function_constant(FunctionConstantLayerFlags + 1) ]]; -constant uint32_t miscFlags3 [[ function_constant(FunctionConstantLayerFlags + 2) ]]; -constant uint32_t miscFlags4 [[ function_constant(FunctionConstantLayerFlags + 3) ]]; -constant uint32_t miscFlags5 [[ function_constant(FunctionConstantLayerFlags + 4) ]]; -constant uint32_t miscFlags6 [[ function_constant(FunctionConstantLayerFlags + 5) ]]; -constant uint32_t miscFlags7 [[ function_constant(FunctionConstantLayerFlags + 6) ]]; -constant uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFlags + 7) ]]; +constant const uint8_t sourceType1 [[ function_constant(FunctionConstantSources + 0) ]]; +constant const uint8_t sourceType2 [[ function_constant(FunctionConstantSources + 1) ]]; +constant const uint8_t sourceType3 [[ function_constant(FunctionConstantSources + 2) ]]; +constant const uint8_t sourceType4 [[ function_constant(FunctionConstantSources + 3) ]]; +constant const uint8_t sourceType5 [[ function_constant(FunctionConstantSources + 4) ]]; +constant const uint8_t sourceType6 [[ function_constant(FunctionConstantSources + 5) ]]; +constant const uint8_t sourceType7 [[ function_constant(FunctionConstantSources + 6) ]]; +constant const uint8_t sourceType8 [[ function_constant(FunctionConstantSources + 7) ]]; + +constant const uint32_t blendModes1 [[ function_constant(FunctionConstantBlendModes + 0) ]]; +constant const uint32_t blendModes2 [[ function_constant(FunctionConstantBlendModes + 1) ]]; +constant const uint32_t blendModes3 [[ function_constant(FunctionConstantBlendModes + 2) ]]; +constant const uint32_t blendModes4 [[ function_constant(FunctionConstantBlendModes + 3) ]]; +constant const uint32_t blendModes5 [[ function_constant(FunctionConstantBlendModes + 4) ]]; +constant const uint32_t blendModes6 [[ function_constant(FunctionConstantBlendModes + 5) ]]; +constant const uint32_t blendModes7 [[ function_constant(FunctionConstantBlendModes + 6) ]]; +constant const uint32_t blendModes8 [[ function_constant(FunctionConstantBlendModes + 7) ]]; + +constant const uint32_t miscFlags1 [[ function_constant(FunctionConstantLayerFlags + 0) ]]; +constant const uint32_t miscFlags2 [[ function_constant(FunctionConstantLayerFlags + 1) ]]; +constant const uint32_t miscFlags3 [[ function_constant(FunctionConstantLayerFlags + 2) ]]; +constant const uint32_t miscFlags4 [[ function_constant(FunctionConstantLayerFlags + 3) ]]; +constant const uint32_t miscFlags5 [[ function_constant(FunctionConstantLayerFlags + 4) ]]; +constant const uint32_t miscFlags6 [[ function_constant(FunctionConstantLayerFlags + 5) ]]; +constant const uint32_t miscFlags7 [[ function_constant(FunctionConstantLayerFlags + 6) ]]; +constant const uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFlags + 7) ]]; + +constant const size_t sampleType1 [[ function_constant(FunctionConstantSampleTypes + 0) ]]; +constant const size_t sampleType2 [[ function_constant(FunctionConstantSampleTypes + 1) ]]; +constant const size_t sampleType3 [[ function_constant(FunctionConstantSampleTypes + 2) ]]; +constant const size_t sampleType4 [[ function_constant(FunctionConstantSampleTypes + 3) ]]; +constant const size_t sampleType5 [[ function_constant(FunctionConstantSampleTypes + 4) ]]; +constant const size_t sampleType6 [[ function_constant(FunctionConstantSampleTypes + 5) ]]; +constant const size_t sampleType7 [[ function_constant(FunctionConstantSampleTypes + 6) ]]; +constant const size_t sampleType8 [[ function_constant(FunctionConstantSampleTypes + 7) ]]; #define MAX_BLEND_PASSES 8 constant const uint8_t sourceTypes[MAX_BLEND_PASSES] = { sourceType1, sourceType2, sourceType3, sourceType4, sourceType5, sourceType6, sourceType7, sourceType8}; constant const uint32_t blendModes[MAX_BLEND_PASSES] = { blendModes1, blendModes2, blendModes3, blendModes4, blendModes5, blendModes6, blendModes7, blendModes8}; constant const uint32_t miscFlags[MAX_BLEND_PASSES] = { miscFlags1, miscFlags2, miscFlags3, miscFlags4, miscFlags5, miscFlags6, miscFlags7, miscFlags8}; -constant uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); +constant const size_t sampleTypes[MAX_BLEND_PASSES] = { sampleType1, sampleType2, sampleType3, sampleType4, sampleType5, sampleType6, sampleType7, sampleType8}; +constant const uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); typedef struct { texture2d textures [[ texture(FragmentShaderArgumentAttributeTextures), function_constant(hasLayer1) ]]; @@ -195,7 +205,10 @@ typedef struct { texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasLayer8) ]]; const constant half4* colors [[ buffer(FragmentShaderArgumentAttributeColors) ]]; const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; - array samplers [[ sampler(0) ]]; + sampler samplers [[ sampler(0) ]]; + sampler sampler2 [[ sampler(1) ]]; + sampler sampler3 [[ sampler(2) ]]; + sampler sampler4 [[ sampler(3) ]]; } FragmentShaderArguments; float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix); @@ -356,8 +369,8 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], return out; } -void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags); -void blend(half4 srcSample, thread half4 &destSample, uint32_t blendFlags); +constexpr void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags); +constexpr void blend(half4 srcSample, thread half4 &destSample, uint32_t blendFlags); float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix) { //Note: If we want to require newer versions of Metal/newer hardware we could pass function pointers instead of doing these ifs. @@ -468,11 +481,27 @@ float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const ui return sampleCoord.xyz; } -half4 sampleLayer(uint8_t passType, uint32_t miscFlags, float3 sampleCoord, const thread half4 &color, const thread texture2d &texture, const thread texturecube &cubicTexture, const thread sampler& colorSampler) { +constexpr half4 sampleLayer(uint8_t passType, uint32_t miscFlags, float3 sampleCoord, const thread half4 &color, const thread texture2d &texture, const thread texturecube &cubicTexture, const uint8_t sampleType, const thread sampler* colorSamplers) { if(passType == PassTypeColor) { return color; } else { + /* + Not using array based lookup here because the compiler + seems to have an easier time unrolling this if each lookup is done + with a constant. Using an array based lookup was hurting performance by + about 1/3rd on Apple Silicon. + */ + sampler colorSampler; + if(sampleType == 0) { + colorSampler = colorSamplers[0]; + } else if(sampleType == 1) { + colorSampler = colorSamplers[1]; + } else if(sampleType == 2) { + colorSampler = colorSamplers[2]; + } else if(sampleType == 3) { + colorSampler = colorSamplers[3]; + } if (miscFlags & kMiscPerspProjection) { sampleCoord.xy /= sampleCoord.z; @@ -511,8 +540,10 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], for(size_t layer=0; layerlayers[layer].sampleType]); + color = sampleLayer(sourceTypes[layer], miscFlags[layer], sampleCoord, half4(in.vtxColor), (&fragmentShaderArgs.textures)[layer], (&fragmentShaderArgs.cubicTextures)[layer], sampleType, &fragmentShaderArgs.samplers); if(layer==0) { blendFirst(color, currentColor, blendModes[layer]); @@ -521,17 +552,17 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], } } - currentColor = half4(in.vtxColor.rgb, 1.0) * currentColor; + currentColor = half4(in.vtxColor.rgb, 1.0h) * currentColor; } - currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb, (1.0f - clamp((float)in.fogColor.a, 0.0f, 1.0f)) * (float)currentColor.a); + currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb, (1.0h - clamp((float)in.fogColor.a, 0.0f, 1.0f)) * (float)currentColor.a); if (currentColor.a < fragmentShaderArgs.bufferedUniforms->alphaThreshold) { discard_fragment(); } return currentColor; } -void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { +constexpr void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { // Local variable to store the color value if (blendFlags & kBlendInvertColor) { @@ -554,7 +585,7 @@ void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendF } } -void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { +constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { // Local variable to store the color value if (blendFlags & kBlendInvertColor) { srcSample.rgb = 1.0h - srcSample.rgb; @@ -690,13 +721,13 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], //only possible alpha sources are layers 0 or 1 if(alphaSrc == 0) { - half4 layerColor = sampleLayer(sourceTypes[2], miscFlags[1], in.texCoord3, half4(layers.colors[0]), (&layers.textures)[0], (&layers.cubicTextures)[0], layers.samplers[sourceTypes[layers.bufferedUniforms->layers[0].sampleType]]); + half4 layerColor = sampleLayer(sourceTypes[2], miscFlags[1], in.texCoord3, half4(layers.colors[0]), (&layers.textures)[0], (&layers.cubicTextures)[0], sampleTypes[0], &layers.samplers); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; } else if(alphaSrc == 1) { - half4 layerColor = sampleLayer(sourceTypes[2], miscFlags[1], in.texCoord3, half4(layers.colors[1]), (&layers.textures)[1], (&layers.cubicTextures)[1], layers.samplers[sourceTypes[layers.bufferedUniforms->layers[1].sampleType]]); + half4 layerColor = sampleLayer(sourceTypes[2], miscFlags[1], in.texCoord3, half4(layers.colors[1]), (&layers.textures)[1], (&layers.cubicTextures)[1], sampleTypes[1], &layers.samplers); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index 0c1c5caff6..f942fa63d8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -89,6 +89,7 @@ enum plMetalFunctionConstant FunctionConstantBlendModes = 10, FunctionConstantLayerFlags = 18, FunctionConstantNumWeights = 26, + FunctionConstantSampleTypes = 34, }; enum plMetalLayerPassType: uint8_t @@ -98,14 +99,8 @@ enum plMetalLayerPassType: uint8_t PassTypeColor = 3 }; -struct plFragmentShaderLayer { - uint8_t sampleType; -}; - struct plMetalFragmentShaderArgumentBuffer { - uint8_t layerCount; __fp16 alphaThreshold; - plFragmentShaderLayer layers[8]; }; struct plMetalShadowCastFragmentShaderArgumentBuffer { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index daf3da9d11..52c77bdf59 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -155,7 +155,6 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode simd_float4 colorMap[8]; plMetalFragmentShaderArgumentBuffer uniforms; - uniforms.layerCount = 0; IHandleMaterial(GetPassIndex(pass), &uniforms, piggyBacks, [&](plLayerInterface* layer, uint32_t index) { @@ -350,26 +349,6 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc } } -void plMetalMaterialShaderRef::PopulateFragmentShaderLayerFromLayer(plFragmentShaderLayer *fragmentLayer, plLayerInterface* layer) { - hsGMatState state = ICompositeLayerState(layer); - plBitmap* texture = layer->GetTexture(); - - switch (layer->GetClampFlags()) { - case hsGMatState::kClampTextureU: - fragmentLayer->sampleType = 1; - break; - case hsGMatState::kClampTextureV: - fragmentLayer->sampleType = 2; - break; - case hsGMatState::kClampTexture: - fragmentLayer->sampleType = 3; - break; - default: - fragmentLayer->sampleType = 0; - break; - } -} - uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) { uint32_t currNumLayers = 1; @@ -511,9 +490,6 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme //ISetBumpMatrices(currLay); } - PopulateFragmentShaderLayerFromLayer(&uniforms->layers[0], currLay); - - uniforms->layerCount++; postEncodeTransform(currLay, 0); @@ -527,10 +503,6 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme } preEncodeTransform(layPtr, i); - PopulateFragmentShaderLayerFromLayer(&uniforms->layers[i], layPtr); - - uniforms->layerCount++; - postEncodeTransform(layPtr, i); } @@ -544,10 +516,6 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme } preEncodeTransform(layPtr, i + currPiggyback); - PopulateFragmentShaderLayerFromLayer(&uniforms->layers[i + currPiggyback], layPtr); - - uniforms->layerCount++; - postEncodeTransform(layPtr, i + currPiggyback); } } @@ -627,3 +595,30 @@ void plMetalMaterialShaderRef::GetMiscFlagArray(uint32_t *array, uint8_t pass) { array[i] = layPtr->GetMiscFlags(); } } + +void plMetalMaterialShaderRef::GetSampleTypeArray(size_t *array, uint8_t pass) { + memset(array, 0, sizeof(uint8_t) * 8); + + uint16_t currNumLayers = fPassLengths[pass]; + uint16_t baseLayer = fPassIndices[pass]; + uint16_t i = 0; + for (i = 0; i < currNumLayers; i++) + { + plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); + + switch (layPtr->GetClampFlags()) { + case hsGMatState::kClampTextureU: + array[i] = 1; + break; + case hsGMatState::kClampTextureV: + array[i] = 2; + break; + case hsGMatState::kClampTexture: + array[i] = 3; + break; + default: + array[i] = 0; + break; + } + } +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 3f31e13420..0eb7e765d4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -97,6 +97,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void GetSourceArray(uint8_t *array, uint8_t pass); void GetBlendFlagArray(uint32_t *array, uint8_t pass); void GetMiscFlagArray(uint32_t *array, uint8_t pass); + void GetSampleTypeArray(size_t *array, uint8_t pass); private: void ILoopOverLayers(); @@ -106,7 +107,6 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef uint32_t ILayersAtOnce(uint32_t which); void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer, simd_float4 *colorMap); - void PopulateFragmentShaderLayerFromLayer(plFragmentShaderLayer *fragmentLayer, plLayerInterface* layer); void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index d50fb7b663..62eea6d304 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1421,9 +1421,11 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons uint8_t sources[8]; uint32_t blendModes[8]; uint32_t miscFlags[8]; + size_t sampleTypes[8]; memset(sources, 0, sizeof(sources)); memset(blendModes, 0, sizeof(blendModes)); memset(miscFlags, 0, sizeof(miscFlags)); + memset(sampleTypes, 0, sizeof(sampleTypes)); lay = IPopOverBaseLayer(lay); @@ -1433,6 +1435,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons mRef->GetSourceArray(sources, pass); mRef->GetBlendFlagArray(blendModes, pass); mRef->GetMiscFlagArray(miscFlags, pass); + mRef->GetSampleTypeArray(sampleTypes, pass); } else { mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, @@ -1457,6 +1460,21 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons blendModes[index] = layer->GetBlendFlags(); miscFlags[index] = layer->GetMiscFlags(); + switch (layer->GetClampFlags()) { + case hsGMatState::kClampTextureU: + sampleTypes[index] = 1; + break; + case hsGMatState::kClampTextureV: + sampleTypes[index] = 2; + break; + case hsGMatState::kClampTexture: + sampleTypes[index] = 3; + break; + default: + sampleTypes[index] = 0; + break; + } + return layer; }, [&](plLayerInterface* layer, uint32_t index){ @@ -1471,6 +1489,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons memcpy(passDescription.passTypes, sources, sizeof(sources)); memcpy(passDescription.blendModes, blendModes, sizeof(blendModes)); memcpy(passDescription.miscFlags, miscFlags, sizeof(miscFlags)); + memcpy(passDescription.sampleTypes, sampleTypes, sizeof(sampleTypes)); passDescription.numLayers = numActivePiggyBacks + mRef->fPassLengths[pass]; plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); @@ -3723,6 +3742,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con memset(&passDescription.miscFlags, 0, sizeof(passDescription.miscFlags)); memset(&passDescription.blendModes, 0, sizeof(passDescription.blendModes)); memset(&passDescription.passTypes, 0, sizeof(passDescription.passTypes)); + memset(&passDescription.sampleTypes, 0, sizeof(passDescription.sampleTypes)); passDescription.Populate(mat->GetLayer(0), 2); passDescription.numLayers = 3; if (mat->GetNumLayers()>1) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index f8182bf3f8..2b4ecafd6e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -96,6 +96,7 @@ void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstan constants->setConstantValues(&fPassDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); constants->setConstantValues(&fPassDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); constants->setConstantValues(&fPassDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); + constants->setConstantValues(&fPassDescription.sampleTypes, MTL::DataTypeULong, NS::Range(FunctionConstantSampleTypes, 8)); } size_t plMetalMaterialPassPipelineState::GetHash() const { @@ -333,6 +334,21 @@ void plMetalMaterialPassDescription::Populate(plLayerInterface* layPtr, uint8_t passTypes[index] = PassTypeColor; } + + switch (layPtr->GetClampFlags()) { + case hsGMatState::kClampTextureU: + sampleTypes[index] = 1; + break; + case hsGMatState::kClampTextureV: + sampleTypes[index] = 2; + break; + case hsGMatState::kClampTexture: + sampleTypes[index] = 3; + break; + default: + sampleTypes[index] = 0; + break; + } } bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) const { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index df84dfb006..fc613ac6e1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -93,10 +93,11 @@ struct plMetalMaterialPassDescription { uint8_t passTypes[8]; uint32_t blendModes[8]; uint32_t miscFlags[8]; + size_t sampleTypes[8]; uint8_t numLayers; bool operator==(const plMetalMaterialPassDescription &p) const { - bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0; + bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0 && memcmp(sampleTypes, p.sampleTypes, sizeof(sampleTypes)) == 0; return match; } @@ -115,6 +116,10 @@ struct plMetalMaterialPassDescription { value ^= std::hash()( passTypes[i] ); } + for(int i=0;i<8;i++){ + value ^= std::hash()( sampleTypes[i] ); + } + return value; } From f182e96ea2cefdcf04a44b01b5a3c08794d9f8ac Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 10 Apr 2022 23:39:08 -0700 Subject: [PATCH 041/165] Initial MSAA support --- .../pfMetalPipeline/plMetalDevice.cpp | 92 +++++++++++++++++-- .../pfMetalPipeline/plMetalDevice.h | 30 +++--- .../pfMetalPipeline/plMetalPipeline.cpp | 5 +- .../pfMetalPipeline/plMetalPlateManager.cpp | 1 + 4 files changed, 102 insertions(+), 26 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index ecb36e17d8..cbd51942a3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -111,7 +111,7 @@ void plMetalDevice::Shutdown() } -void plMetalDevice::SetMaxAnsiotropy(int8_t maxAnsiotropy) +void plMetalDevice::SetMaxAnsiotropy(uint8_t maxAnsiotropy) { //setup the material pass samplers //load them all at once and then let the shader pick @@ -146,6 +146,31 @@ void plMetalDevice::SetMaxAnsiotropy(int8_t maxAnsiotropy) fSamplerStates[3] = fMetalDevice->newSamplerState(samplerDescriptor); } +void plMetalDevice::SetMSAASampleCount(uint8_t sampleCount) +{ + //Plasma has some MSAA levels that don't completely correspond to what Metal can do + //Best fit them to levels Metal can do. Once they are best fit see if the hardware + //is capable. + + uint8_t actualSampleCount = 1; + if (sampleCount == 6) { + actualSampleCount = 8; + } else if (sampleCount == 4) { + actualSampleCount = 4; + } else if (sampleCount == 2) { + actualSampleCount = 2; + } + + while (actualSampleCount != 1) { + if (fMetalDevice->supportsTextureSampleCount(actualSampleCount)) { + break; + } + actualSampleCount /= 2; + } + + fSampleCount = actualSampleCount; +} + void plMetalDevice::ReleaseSamplerStates() { fSamplerStates[0]->release(); @@ -213,7 +238,6 @@ void plMetalDevice::BeginNewRenderPass() { } MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); - renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); if (fCurrentRenderTarget) { @@ -231,6 +255,9 @@ void plMetalDevice::BeginNewRenderPass() { renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); } + + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); + fCurrentRenderTargetCommandEncoder = fCurrentOffscreenCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); } else { renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearDrawableColor.x, fClearDrawableColor.y, fClearDrawableColor.z, fClearDrawableColor.w)); @@ -240,10 +267,17 @@ void plMetalDevice::BeginNewRenderPass() { renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); } - renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); renderPassDescriptor->depthAttachment()->setClearDepth(fClearDrawableDepth); - renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); + renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); + renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + + if (fSampleCount == 1) { + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); + } + fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); } @@ -305,7 +339,8 @@ plMetalDevice::plMetalDevice() fCurrentCommandBuffer(nullptr), fCurrentOffscreenCommandBuffer(nullptr), fCurrentRenderTarget(nullptr), - fNewPipelineStateMap() + fNewPipelineStateMap(), + fCurrentFragmentMSAAOutputTexture(nullptr) { fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; @@ -822,20 +857,39 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) ) { if(fCurrentDrawableDepthTexture) { fCurrentDrawableDepthTexture->release(); + fCurrentFragmentMSAAOutputTexture->release(); } MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, drawable->texture()->width(), drawable->texture()->height(), false); - if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { + if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); } depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); - fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); + if (fSampleCount != 1) { + //MSSA depth and color output + depthTextureDescriptor->setSampleCount(fSampleCount); + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + depthTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); + fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); + + MTL::TextureDescriptor *msaaColorTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), + drawable->texture()->width(), + drawable->texture()->height(), + false); + msaaColorTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); + msaaColorTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + msaaColorTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); + msaaColorTextureDescriptor->setSampleCount(fSampleCount); + fCurrentFragmentMSAAOutputTexture = fMetalDevice->newTexture(msaaColorTextureDescriptor); + } else { + fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); + } } fCurrentDrawable = drawable->retain(); } @@ -873,6 +927,8 @@ void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condi descriptor->setDepthAttachmentPixelFormat(record.depthFormat); descriptor->colorAttachments()->object(0)->setPixelFormat(record.colorFormat); + descriptor->setSampleCount(record.sampleCount); + NS::Error* error; fMetalDevice->newRenderPipelineState(descriptor, ^(MTL::RenderPipelineState *pipelineState, NS::Error *error){ if (error) { @@ -903,7 +959,8 @@ plMetalDevice::plMetalLinkedPipeline* plMetalDevice::PipelineState(plMetalPipeli plMetalPipelineRecord record = { depthFormat, - colorFormat + colorFormat, + CurrentTargetSampleCount() }; record.state = std::shared_ptr(pipelineState->Clone()); @@ -948,7 +1005,8 @@ std::condition_variable* plMetalDevice::PrewarmPipelineStateFor(plMetalPipelineS plMetalPipelineRecord record = { depthFormat, - colorFormat + colorFormat, + CurrentTargetSampleCount() }; record.state = std::shared_ptr(pipelineState->Clone()); @@ -965,6 +1023,7 @@ std::condition_variable* plMetalDevice::PrewarmPipelineStateFor(plMetalPipelineS bool plMetalDevice::plMetalPipelineRecord::operator==(const plMetalPipelineRecord &p) const { return depthFormat == p.depthFormat && colorFormat == p.colorFormat && + sampleCount == p.sampleCount && state->operator==(*p.state); } @@ -979,6 +1038,20 @@ void plMetalDevice::SubmitCommandBuffer() fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; + if (fSampleCount != 1) { + //MSAA is enabled, do the final multisampling resolve pass + + MTL::RenderPassDescriptor *resolvePassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + resolvePassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + resolvePassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); + resolvePassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); + resolvePassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); + + MTL::RenderCommandEncoder * resolveCommand = fCurrentCommandBuffer->renderCommandEncoder(resolvePassDescriptor); + resolveCommand->setLabel(NS::MakeConstantString("Resolve Multisampling Pass")); + resolveCommand->endEncoding(); + } + fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); fCurrentCommandBuffer->enqueue(); fCurrentCommandBuffer->commit(); @@ -1003,6 +1076,7 @@ std::size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMeta std::size_t value = std::hash()(s.depthFormat); value ^= std::hash()(s.colorFormat); value ^= std::hash()(*s.state); + value ^= std::hash()(s.sampleCount); return value; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 6a4aca8816..4d5a46b2f4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -159,6 +159,7 @@ class plMetalDevice MTL::DepthStencilState *fNoZReadOrWriteStencilState; MTL::DepthStencilState *fReverseZStencilState; MTL::DepthStencilState *fDefaultStencilState; + uint8_t fSampleCount; ///Create a new command buffer to encode all the operations needed to draw a frame //Currently requires a CA drawable and not a Metal drawable. In since CA drawable is only abstract implementation I know about, not sure where we would find others? @@ -169,12 +170,22 @@ class plMetalDevice void SubmitCommandBuffer(); void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); - void SetMaxAnsiotropy(int8_t maxAnsiotropy); + void SetMaxAnsiotropy(uint8_t maxAnsiotropy); + void SetMSAASampleCount(uint8_t sampleCount); + + NS::UInteger CurrentTargetSampleCount() { + if (fCurrentRenderTarget) { + return 1; + } else { + return fSampleCount; + } + } private: struct plMetalPipelineRecord { MTL::PixelFormat depthFormat; MTL::PixelFormat colorFormat; + NS::UInteger sampleCount; std::shared_ptr state; bool operator==(const plMetalPipelineRecord &p) const; @@ -193,20 +204,6 @@ class plMetalDevice void StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable **condOut); std::condition_variable* PrewarmPipelineStateFor(plMetalPipelineState* pipelineState); - struct plPipelineStateRecord { - MTL::PixelFormat outputFormat; - MTL::PixelFormat depthFormat; - plMetalPipelineState *state; - - bool operator==(const plPipelineStateRecord &p) const { - return (outputFormat == p.outputFormat && depthFormat == p.depthFormat && state == p.state); - } - - plPipelineStateRecord(const plPipelineStateRecord &attributes) { - memcpy(this, &attributes, sizeof(plPipelineStateRecord)); - } - }; - protected: plMetalLinkedPipeline* PipelineState(plMetalPipelineState* pipelineState); @@ -217,8 +214,11 @@ class plMetalDevice MTL::CommandBuffer* fCurrentCommandBuffer; MTL::CommandBuffer* fCurrentOffscreenCommandBuffer; MTL::RenderCommandEncoder* fCurrentRenderTargetCommandEncoder; + MTL::Texture* fCurrentDrawableDepthTexture; MTL::Texture* fCurrentFragmentOutputTexture; + MTL::Texture* fCurrentFragmentMSAAOutputTexture; + CA::MetalDrawable* fCurrentDrawable; MTL::PixelFormat fCurrentDepthFormat; simd_float4 fClearRenderTargetColor; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 62eea6d304..3bc9bbdd18 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -201,6 +201,9 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons plQuality::SetCapability(plQuality::kPS_3); //plShadowCaster::EnableShadowCast(false); + fDevice.SetMaxAnsiotropy(fInitialPipeParams.AnisotropicLevel); + fDevice.SetMSAASampleCount(fInitialPipeParams.AntiAliasingAmount); + fPlateMgr = new plMetalPlateManager(this); fCurrentRenderPassUniforms = (VertexUniforms *) calloc(sizeof(VertexUniforms), sizeof(char)); @@ -209,8 +212,6 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons // RenderTarget pools are shared for our shadow generation algorithm. // Different sizes for different resolutions. IMakeRenderTargetPools(); - - fDevice.SetMaxAnsiotropy(fInitialPipeParams.AnisotropicLevel); } plMetalPipeline::~plMetalPipeline() diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 38b9c4edba..737ce4088f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -67,6 +67,7 @@ plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); descriptor->setDepthAttachmentPixelFormat(MTL::PixelFormatDepth32Float_Stencil8); + descriptor->setSampleCount(pipe->fDevice.fSampleCount); //create the descriptor of the vertex array MTL::VertexDescriptor *vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); From 09857368159e0d461362794cf8307bcbe8d6863d Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 11 Apr 2022 22:37:53 -0700 Subject: [PATCH 042/165] Simplifying shader code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Using the power of C++ to move the sampleLayer function into the layers struct. The function can now directly access struct members and doesn’t need them passed as arguments. Having trouble with the array lookup of samplers on Intel graphics and hoping this helps. --- .../ShaderSrc/FixedPipelineShaders.metal | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index bd26077343..f47283a9cc 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -209,6 +209,7 @@ typedef struct { sampler sampler2 [[ sampler(1) ]]; sampler sampler3 [[ sampler(2) ]]; sampler sampler4 [[ sampler(3) ]]; + half4 sampleLayer(const size_t index, const uint8_t passType, float3 sampleCoord) const; } FragmentShaderArguments; float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix); @@ -481,10 +482,10 @@ float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const ui return sampleCoord.xyz; } -constexpr half4 sampleLayer(uint8_t passType, uint32_t miscFlags, float3 sampleCoord, const thread half4 &color, const thread texture2d &texture, const thread texturecube &cubicTexture, const uint8_t sampleType, const thread sampler* colorSamplers) { +half4 FragmentShaderArguments::sampleLayer(const size_t index, const uint8_t passType, float3 sampleCoord) const { if(passType == PassTypeColor) { - return color; + return colors[index]; } else { /* Not using array based lookup here because the compiler @@ -492,6 +493,7 @@ constexpr half4 sampleLayer(uint8_t passType, uint32_t miscFlags, float3 sampleC with a constant. Using an array based lookup was hurting performance by about 1/3rd on Apple Silicon. */ + size_t sampleType = sampleTypes[index]; sampler colorSampler; if(sampleType == 0) { colorSampler = colorSamplers[0]; @@ -503,15 +505,15 @@ constexpr half4 sampleLayer(uint8_t passType, uint32_t miscFlags, float3 sampleC colorSampler = colorSamplers[3]; } - if (miscFlags & kMiscPerspProjection) { + if (miscFlags[index] & kMiscPerspProjection) { sampleCoord.xy /= sampleCoord.z; } //do the actual sample if(passType == PassTypeTexture) { - return texture.sample(colorSampler, sampleCoord.xy); + return (&textures)[index].sample(colorSampler, sampleCoord.xy); } else if(passType == PassTypeCubicTexture) { - return cubicTexture.sample(colorSampler, sampleCoord.xyz); + return (&cubicTextures)[index].sample(colorSampler, sampleCoord.xyz); } else { return half4(0); } @@ -540,10 +542,8 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], for(size_t layer=0; layer Date: Mon, 11 Apr 2022 23:01:45 -0700 Subject: [PATCH 043/165] Fixing samplers Was using the wrong samplers --- .../ShaderSrc/FixedPipelineShaders.metal | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index f47283a9cc..6004cdca37 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -205,10 +205,10 @@ typedef struct { texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasLayer8) ]]; const constant half4* colors [[ buffer(FragmentShaderArgumentAttributeColors) ]]; const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; - sampler samplers [[ sampler(0) ]]; - sampler sampler2 [[ sampler(1) ]]; - sampler sampler3 [[ sampler(2) ]]; - sampler sampler4 [[ sampler(3) ]]; + sampler repeatSampler [[ sampler(0) ]]; + sampler clampRepeatSampler [[ sampler(1) ]]; + sampler repeatClampSampler [[ sampler(2) ]]; + sampler clampSampler [[ sampler(3) ]]; half4 sampleLayer(const size_t index, const uint8_t passType, float3 sampleCoord) const; } FragmentShaderArguments; @@ -496,13 +496,13 @@ half4 FragmentShaderArguments::sampleLayer(const size_t index, const uint8_t pas size_t sampleType = sampleTypes[index]; sampler colorSampler; if(sampleType == 0) { - colorSampler = colorSamplers[0]; + colorSampler = repeatSampler; } else if(sampleType == 1) { - colorSampler = colorSamplers[1]; + colorSampler = clampRepeatSampler; } else if(sampleType == 2) { - colorSampler = colorSamplers[2]; + colorSampler = repeatClampSampler; } else if(sampleType == 3) { - colorSampler = colorSamplers[3]; + colorSampler = clampSampler; } if (miscFlags[index] & kMiscPerspProjection) { From b18d6a087b291bc9aa7d9e7984eaaac3d9e8ac9d Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 14 Apr 2022 20:56:42 -0700 Subject: [PATCH 044/165] Moving drawable pass back down to single pass Adding a clear shader to clear depth/color as needed without staerting a new pass --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 + .../pfMetalPipeline/ShaderSrc/Clear.metal | 86 ++++++++++++ .../pfMetalPipeline/plMetalDevice.cpp | 83 +++++++----- .../pfMetalPipeline/plMetalPipelineState.cpp | 30 +++-- .../pfMetalPipeline/plMetalPipelineState.h | 124 +++++++++++++++--- 5 files changed, 266 insertions(+), 58 deletions(-) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index fa91461465..7266ef502e 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -129,6 +129,7 @@ elseif(APPLE) ../../FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal ) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal new file mode 100644 index 0000000000..5bc3c0cbb4 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal @@ -0,0 +1,86 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +constant const bool ShouldClearDepth [[ function_constant(0) ]]; +constant const bool ShouldClearColor [[ function_constant(1) ]]; + +struct ClearVertexIn +{ + float2 position [[ attribute(0) ]]; +}; + +struct ClearVertexOut +{ + float4 position [[ position ]]; +}; + +struct ClearFragmentOut +{ + float depth [[depth(any), function_constant(ShouldClearDepth)]]; + half4 color [[color(0), function_constant(ShouldClearColor)]]; +}; + +vertex ClearVertexOut clearVertex(ClearVertexIn in [[ stage_in ]]) +{ + ClearVertexOut out; + // Just pass the position through. We're clearing in NDC space. + out.position = float4(in.position, 0.5, 1.0); + return out; +} + +fragment ClearFragmentOut clearFragment( + constant half4& clearColor [[ buffer(0), function_constant(ShouldClearColor) ]], + constant float& clearDepth [[ buffer(1), function_constant(ShouldClearDepth) ]] + ) +{ + ClearFragmentOut out; + if(ShouldClearDepth) { + out.depth = clearDepth; + } + if(ShouldClearColor) { + out.color = clearColor; + } + return out; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index cbd51942a3..e6514970f8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -195,26 +195,47 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh //draw and clear at the same time. So if it's a clear for the //current drawable, remember that and perform the clear when //we're actually drawing to screen. - if (shouldClearColor) { - if (fCurrentRenderTarget) { - fClearRenderTargetColor = clearColor; - fShouldClearRenderTarget = shouldClearColor; - if (shouldClearDepth) { - fClearRenderTargetDepth = clearDepth; - } - } else { - fClearDrawableColor = clearColor; - fShouldClearDrawable = shouldClearColor; - if (shouldClearDepth) { - fClearDrawableDepth = clearDepth; - } - } - } if (fCurrentRenderTargetCommandEncoder) { - fCurrentRenderTargetCommandEncoder->endEncoding(); - fCurrentRenderTargetCommandEncoder->release(); - fCurrentRenderTargetCommandEncoder = nil; + half4 halfClearColor; + halfClearColor[0] = clearColor.r; + halfClearColor[1] = clearColor.g; + halfClearColor[2] = clearColor.b; + halfClearColor[3] = clearColor.a; + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalClearPipelineState(this, shouldClearColor, shouldClearDepth).GetRenderPipelineState(); + + const MTL::RenderPipelineState *pipelineState = linkedPipeline->pipelineState; + CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); + + float clearCoords[8] = { + -1, -1, + 1, -1, + -1, 1, + 1, 1 + }; + float clearDepth = 1.0f; + CurrentRenderCommandEncoder()->setDepthStencilState(fNoZReadStencilState); + + CurrentRenderCommandEncoder()->setVertexBytes(&clearCoords, sizeof(clearCoords), 0); + CurrentRenderCommandEncoder()->setFragmentBytes(&halfClearColor, sizeof(halfClearColor), 0); + CurrentRenderCommandEncoder()->setFragmentBytes(&clearDepth, sizeof(float), 1); + CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + } else { + if (shouldClearColor) { + if (fCurrentRenderTarget) { + fClearRenderTargetColor = clearColor; + fShouldClearRenderTarget = shouldClearColor; + if (shouldClearDepth) { + fClearRenderTargetDepth = clearDepth; + } + } else { + fClearDrawableColor = clearColor; + fShouldClearDrawable = shouldClearColor; + if (shouldClearDepth) { + fClearDrawableDepth = clearDepth; + } + } + } } } @@ -272,10 +293,13 @@ void plMetalDevice::BeginNewRenderPass() { renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); + if (fSampleCount == 1) { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); } else { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); + renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); } fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); @@ -286,6 +310,15 @@ void plMetalDevice::BeginNewRenderPass() { void plMetalDevice::SetRenderTarget(plRenderTarget* target) { + /* + If we're being asked to set the render target to the current drawable, + but we're being asked to set the render target to the drawable, don't do anything. + We used to allow starting new passes on the same drawable but that would break + memoryless buffers on Apple Silicon that don't survive between passes. + */ + if((!fCurrentRenderTarget && !target) && fCurrentRenderTargetCommandEncoder) { + return; + } if( fCurrentRenderTargetCommandEncoder ) { //if we have an existing render target, submit it's commands and release it //if we need to come back to this render target, we can always create a new render @@ -1038,20 +1071,6 @@ void plMetalDevice::SubmitCommandBuffer() fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; - if (fSampleCount != 1) { - //MSAA is enabled, do the final multisampling resolve pass - - MTL::RenderPassDescriptor *resolvePassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); - resolvePassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); - resolvePassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); - resolvePassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); - resolvePassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); - - MTL::RenderCommandEncoder * resolveCommand = fCurrentCommandBuffer->renderCommandEncoder(resolvePassDescriptor); - resolveCommand->setLabel(NS::MakeConstantString("Resolve Multisampling Pass")); - resolveCommand->endEncoding(); - } - fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); fCurrentCommandBuffer->enqueue(); fCurrentCommandBuffer->commit(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 2b4ecafd6e..68c7d3f489 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -51,28 +51,36 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plPipeline/plRenderTarget.h" #include "plMetalDevice.h" -plMetalPipelineState::plMetalPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) +size_t plMetalPipelineState::GetHash() const { + return std::hash()(GetID()); +} + +plMetalPipelineState::plMetalPipelineState(plMetalDevice* device) : fDevice(device) +{ +} + +plMetalRenderSpanPipelineState::plMetalRenderSpanPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) +: plMetalPipelineState(device) { fNumUVs = plGBufferGroup::CalcNumUVs(vRef->fFormat); fNumWeights = (vRef->fFormat & plGBufferGroup::kSkinWeightMask) >> 4; fHasSkinIndices = (vRef->fFormat & plGBufferGroup::kSkinIndices); } -void plMetalPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const +void plMetalRenderSpanPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const { ushort numUVs = fNumUVs; constants->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); constants->setConstantValue(&fNumWeights, MTL::DataTypeUChar, FunctionConstantNumWeights); } -size_t plMetalPipelineState::GetHash() const { +size_t plMetalRenderSpanPipelineState::GetHash() const { std::size_t h1 = std::hash()(fNumUVs); std::size_t h2 = std::hash()(fNumWeights); std::size_t h3 = std::hash()(fHasSkinIndices); - std::size_t h4 = std::hash()(GetID()); - return h1 ^ h2 ^ h3 ^ h4; + return h1 ^ h2 ^ h3 ^ plMetalPipelineState::GetHash(); } plMetalDevice::plMetalLinkedPipeline* plMetalPipelineState::GetRenderPipelineState() { @@ -85,13 +93,13 @@ void plMetalPipelineState::PrewarmRenderPipelineState() { plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalMaterialPassDescription &description) -: plMetalPipelineState(device, vRef) { +: plMetalRenderSpanPipelineState(device, vRef) { fPassDescription = description; } void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const { - plMetalPipelineState::GetFunctionConstants(constants); + plMetalRenderSpanPipelineState::GetFunctionConstants(constants); constants->setConstantValue(&fPassDescription.numLayers, MTL::DataTypeUChar, FunctionConstantNumLayers); constants->setConstantValues(&fPassDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); constants->setConstantValues(&fPassDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); @@ -100,13 +108,13 @@ void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstan } size_t plMetalMaterialPassPipelineState::GetHash() const { - std::size_t value = plMetalPipelineState::GetHash(); + std::size_t value = plMetalRenderSpanPipelineState::GetHash(); value ^= fPassDescription.GetHash(); return value; } -void plMetalPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) { +void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) { int vertOffset = 0; int skinWeightOffset = vertOffset + (sizeof(float) * 3); if(this->fHasSkinIndices) { @@ -146,7 +154,7 @@ void plMetalPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vert vertexDescriptor->layouts()->object(VertexAttributePosition)->setStride(stride); } -void plMetalPipelineState::ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor *descriptor) +void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { if (blendMode & hsGMatState::kBlendNoColor) { //printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); @@ -352,7 +360,7 @@ void plMetalMaterialPassDescription::Populate(plLayerInterface* layPtr, uint8_t } bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) const { - return static_cast(&p)->fPassDescription == this->fPassDescription; + return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fPassDescription == this->fPassDescription; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index fc613ac6e1..b62627f8d6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -52,14 +52,14 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com class plMetalPipelineState { public: - plMetalPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef); + plMetalPipelineState(plMetalDevice* device); plMetalDevice::plMetalLinkedPipeline* GetRenderPipelineState(); void PrewarmRenderPipelineState(); bool operator==(const plMetalPipelineState& p) const { if ((&p)->GetID() != this->GetID()) { return false; } else { - return p.fNumUVs == fNumUVs && p.fNumWeights == fNumWeights && p.fHasSkinIndices == fHasSkinIndices && IsEqual(p); + return IsEqual(p); } } virtual size_t GetHash() const; @@ -73,11 +73,34 @@ class plMetalPipelineState { virtual const NS::String* GetDescription() = 0; virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; - void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor); + virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) = 0; +protected: + plMetalDevice* fDevice; + virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const = 0; + MTL::FunctionConstantValues* MakeFunctionConstants() { + MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); + this->GetFunctionConstants(constants); + return constants; + } +}; + +class plMetalRenderSpanPipelineState: public plMetalPipelineState { +public: + plMetalRenderSpanPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef); + virtual bool IsEqual(const plMetalPipelineState &p) const { + const plMetalRenderSpanPipelineState *renderSpanPipelineSate = static_cast(&p); + if( !renderSpanPipelineSate ) { + return false; + } + return renderSpanPipelineSate->fNumUVs == fNumUVs && renderSpanPipelineSate->fNumWeights == fNumWeights && renderSpanPipelineSate->fHasSkinIndices == fHasSkinIndices; + }; + virtual size_t GetHash() const; + + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; + virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor); void ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor *descriptor); protected: - plMetalDevice* fDevice; uint8_t fNumUVs; uint8_t fNumWeights; bool fHasSkinIndices; @@ -101,8 +124,9 @@ struct plMetalMaterialPassDescription { return match; } - size_t GetHash() const { + virtual size_t GetHash() const { std::size_t value = std::hash()(numLayers); + value ^= std::hash()(numLayers); for(int i=0;i<8;i++){ value ^= std::hash()( blendModes[i] ); @@ -135,7 +159,7 @@ struct std::hash } }; -class plMetalMaterialPassPipelineState: public plMetalPipelineState { +class plMetalMaterialPassPipelineState: public plMetalRenderSpanPipelineState { public: plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, const plMetalMaterialPassDescription &description); virtual size_t GetHash() const override; @@ -159,10 +183,10 @@ class plMetalMaterialPassPipelineState: public plMetalPipelineState { plMetalMaterialPassDescription fPassDescription; }; -class plMetalRenderShadowCasterPipelineState: public plMetalPipelineState { +class plMetalRenderShadowCasterPipelineState: public plMetalRenderSpanPipelineState { public: plMetalRenderShadowCasterPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) - : plMetalPipelineState(device, vRef) { + : plMetalRenderSpanPipelineState(device, vRef) { } const MTL::Function* GetVertexFunction(MTL::Library* library) override; @@ -178,11 +202,6 @@ class plMetalRenderShadowCasterPipelineState: public plMetalPipelineState { }; virtual uint16_t GetID() const override { return 2; }; - bool IsEqual(const plMetalPipelineState &p) const override { - //nothing to add at this level - return true; - } - virtual plMetalPipelineState* Clone() override { return new plMetalRenderShadowCasterPipelineState(*this); @@ -208,10 +227,10 @@ class plMetalRenderShadowPipelineState: public plMetalMaterialPassPipelineState } }; -class plMetalDynamicMaterialPipelineState: public plMetalPipelineState { +class plMetalDynamicMaterialPipelineState: public plMetalRenderSpanPipelineState { public: plMetalDynamicMaterialPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, uint32_t blendMode, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID) - : plMetalPipelineState(device, vRef), + : plMetalRenderSpanPipelineState(device, vRef), fVertexShaderID(vertexShaderID), fFragmentShaderID(fragmentShaderID), fBlendMode(blendMode) { @@ -264,4 +283,79 @@ struct std::hash } }; +class plMetalClearPipelineState: public plMetalPipelineState { +public: + plMetalClearPipelineState(plMetalDevice *device, bool shouldClearColor, bool shouldClearDepth): + plMetalPipelineState(device) + { + fShouldClearDepth = shouldClearDepth; + fShouldClearColor = shouldClearColor; + } + + virtual bool IsEqual(const plMetalPipelineState &p) const override { + const plMetalClearPipelineState* clearState = static_cast(&p); + if (!clearState) { + return false; + } + return clearState->fShouldClearDepth == fShouldClearDepth && fShouldClearColor == clearState->fShouldClearColor; + }; + + virtual uint16_t GetID() const override { return 4; }; + virtual plMetalPipelineState* Clone() override { + return new plMetalClearPipelineState(*this); + }; + + // + virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override { + return library->newFunction(NS::MakeConstantString("clearVertex")); + }; + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override { + return library->newFunction(NS::MakeConstantString("clearFragment"), + MakeFunctionConstants(), + (NS::Error **)NULL + )->autorelease(); + }; + virtual const NS::String* GetDescription() override { + return NS::MakeConstantString("Clear"); + }; + + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override { + //if (fShouldClearColor) { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + //} else { + // descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); + // descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); + //} + }; + + virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override { + vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(0)->setOffset(0); + vertexDescriptor->attributes()->object(0)->setBufferIndex(0); + vertexDescriptor->layouts()->object(0)->setStride(8); + vertexDescriptor->layouts()->object(0)->setStepFunction(MTL::VertexStepFunctionPerVertex); + vertexDescriptor->layouts()->object(0)->setStepRate(1); + }; + + virtual void GetFunctionConstants(MTL::FunctionConstantValues* values) const override { + values->setConstantValue(&fShouldClearDepth, MTL::DataTypeBool, NS::UInteger(0)); + values->setConstantValue(&fShouldClearColor, MTL::DataTypeBool, NS::UInteger(1)); + } + + virtual size_t GetHash() const override { + std::size_t value = plMetalPipelineState::GetHash(); + value ^= std::hash()(fShouldClearColor); + value ^= std::hash()(fShouldClearDepth); + + return value; + } + +private: + + bool fShouldClearColor; + bool fShouldClearDepth; + +}; + #endif /* plMetalPipelineState_hpp */ From 03b9616d47fa51047b4d97056dfa946fc03078cf Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 15 Apr 2022 22:36:10 -0700 Subject: [PATCH 045/165] Fixing rendering issues with state selection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shaded material states like water weren’t correctly hashing and testing equality based on span properties --- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index b62627f8d6..3c418fc80b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -246,7 +246,7 @@ class plMetalDynamicMaterialPipelineState: public plMetalRenderSpanPipelineState if (!dynamicState) { return false; } - return dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID && dynamicState->fBlendMode == fBlendMode; + return plMetalRenderSpanPipelineState::IsEqual(p) && dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID && dynamicState->fBlendMode == fBlendMode; } size_t GetHash() const override { @@ -255,7 +255,7 @@ class plMetalDynamicMaterialPipelineState: public plMetalRenderSpanPipelineState value ^= std::hash()(fVertexShaderID); value ^= std::hash()(fBlendMode); - return value; + return value ^ plMetalRenderSpanPipelineState::GetHash(); } const MTL::Function* GetVertexFunction(MTL::Library *library) override; From 6f82663beb01769d5af09fb057e04fc71a4f6db0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 17 Apr 2022 14:21:41 -0700 Subject: [PATCH 046/165] Bug fixes and performance improvements for MSAA MSAA textures are now memoryless. Vertex shader has been improved to reduce memory bandwidth and encoding time. --- .../ShaderSrc/FixedPipelineShaders.metal | 68 ++++++++++--------- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 5 +- .../pfMetalPipeline/plMetalDevice.cpp | 8 ++- .../plMetalMaterialShaderRef.cpp | 1 - .../pfMetalPipeline/plMetalPipeline.cpp | 23 +++---- .../pfMetalPipeline/plMetalPipelineState.cpp | 8 +-- 6 files changed, 59 insertions(+), 54 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 6004cdca37..15905f51c0 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -209,11 +209,9 @@ typedef struct { sampler clampRepeatSampler [[ sampler(1) ]]; sampler repeatClampSampler [[ sampler(2) ]]; sampler clampSampler [[ sampler(3) ]]; - half4 sampleLayer(const size_t index, const uint8_t passType, float3 sampleCoord) const; + half4 sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const; } FragmentShaderArguments; -float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix); - typedef struct { float4 position [[position]]; @@ -226,7 +224,7 @@ typedef struct float3 texCoord7 [[function_constant(hasLayer7)]]; float3 texCoord8 [[function_constant(hasLayer8)]]; //float4 normal; - half4 vtxColor; + half4 vtxColor [[ centroid_perspective ]]; half4 fogColor; //float4 vCamNormal; } ColorInOut; @@ -234,7 +232,7 @@ typedef struct typedef struct { - float4 position [[position]]; + float4 position [[position, invariant]]; float3 texCoord1; } ShadowCasterInOut; @@ -349,21 +347,21 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], const float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0)); if(hasLayer1) - out.texCoord1 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[0].transform, uniforms.uvTransforms[0].UVWSrc, miscFlags1, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord1 = uniforms.sampleLocation(0, &in.texCoord1, normal, vCamPosition); if(hasLayer2) - out.texCoord2 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[1].transform, uniforms.uvTransforms[1].UVWSrc, miscFlags2, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord2 = uniforms.sampleLocation(1, &in.texCoord1, normal, vCamPosition); if(hasLayer3) - out.texCoord3 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[2].transform, uniforms.uvTransforms[2].UVWSrc, miscFlags3, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord3 = uniforms.sampleLocation(2, &in.texCoord1, normal, vCamPosition); if(hasLayer4) - out.texCoord4 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[3].transform, uniforms.uvTransforms[3].UVWSrc, miscFlags4, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord4 = uniforms.sampleLocation(3, &in.texCoord1, normal, vCamPosition); if(hasLayer5) - out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[4].transform, uniforms.uvTransforms[4].UVWSrc, miscFlags5, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord5 = uniforms.sampleLocation(4, &in.texCoord1, normal, vCamPosition); if(hasLayer6) - out.texCoord5 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[5].transform, uniforms.uvTransforms[5].UVWSrc, miscFlags6, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord5 = uniforms.sampleLocation(5, &in.texCoord1, normal, vCamPosition); if(hasLayer7) - out.texCoord7 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[6].transform, uniforms.uvTransforms[6].UVWSrc, miscFlags7, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord7 = uniforms.sampleLocation(6, &in.texCoord1, normal, vCamPosition); if(hasLayer8) - out.texCoord8 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[7].transform, uniforms.uvTransforms[7].UVWSrc, miscFlags8, normal, vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + out.texCoord8 = uniforms.sampleLocation(7, &in.texCoord1, normal, vCamPosition); out.position = uniforms.projectionMatrix * vCamPosition; @@ -373,10 +371,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constexpr void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags); constexpr void blend(half4 srcSample, thread half4 &destSample, uint32_t blendFlags); -float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const uint UVWSrc, const uint flags, const float4 normal, const float4 camPosition, const matrix_float4x4 camToWorldMatrix, const matrix_float4x4 projectionMatrix) { +float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, const float4 normal, const float4 camPosition) constant { + const uint32_t UVWSrc = uvTransforms[index].UVWSrc; + float4x4 matrix = uvTransforms[index].transform; + const uint32_t flags = miscFlags[index]; //Note: If we want to require newer versions of Metal/newer hardware we could pass function pointers instead of doing these ifs. if (flags & (kMiscUseReflectionXform | kMiscUseRefractionXform)) { - matrix = camToWorldMatrix; + matrix = cameraToWorldMatrix; matrix[3][0] = matrix[3][1] = matrix[3][2] = 0; // This is just a rotation about X of Pi/2 (y = z, z = -y), @@ -444,7 +445,7 @@ float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const ui // Multiply by the projection matrix matrix = matrix * projectionMatrix; } else if (flags & kMiscProjection) { - matrix_float4x4 cam2World = camToWorldMatrix; + matrix_float4x4 cam2World = cameraToWorldMatrix; if( !(UVWSrc & kUVWPosition) ) { cam2World.columns[3][0] = 0; cam2World.columns[3][1] = 0; @@ -482,10 +483,10 @@ float3 sampleLocation(thread float3 *texCoords, matrix_float4x4 matrix, const ui return sampleCoord.xyz; } -half4 FragmentShaderArguments::sampleLayer(const size_t index, const uint8_t passType, float3 sampleCoord) const { +half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const { if(passType == PassTypeColor) { - return colors[index]; + return vertexColor; } else { /* Not using array based lookup here because the compiler @@ -543,7 +544,7 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], float3 sampleCoord = (&in.texCoord1)[layer]; - color = fragmentShaderArgs.sampleLayer(layer, sourceTypes[layer], sampleCoord); + color = fragmentShaderArgs.sampleLayer(layer, in.vtxColor, sourceTypes[layer], sampleCoord); if(layer==0) { blendFirst(color, currentColor, blendModes[layer]); @@ -618,12 +619,16 @@ constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t b srcSample.a = srcSample.a; } - if (blendFlags & kBlendAlphaAdd) { - // alpha = alphaVal + prev - destSample.a = srcSample.a + destSample.a; - } else if (blendFlags & kBlendAlphaMult) { - // alpha = alphaVal * prev - destSample.a = srcSample.a * destSample.a; + switch( blendFlags & ( kBlendAlphaAdd | kBlendAlphaMult ) ) { + case 0: + destSample.a = destSample.a; + break; + case kBlendAlphaAdd: + destSample.a = srcSample.a + destSample.a; + break; + case kBlendAlphaMult: + destSample.a = srcSample.a * destSample.a; + break; } break; } @@ -682,8 +687,9 @@ vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], const float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0)); - //out.texCoord1 = (uniforms.uvTransforms[0].transform * vCamPosition).xyz; - out.texCoord1 = sampleLocation(&in.texCoord1, uniforms.uvTransforms[0].transform, uniforms.uvTransforms[0].UVWSrc, 0, float4(0.0), vCamPosition, uniforms.cameraToWorldMatrix, uniforms.projectionMatrix); + float4x4 matrix = uniforms.uvTransforms[0].transform * uniforms.cameraToWorldMatrix; + + out.texCoord1 = (vCamPosition * matrix).xyz; out.position = uniforms.projectionMatrix * vCamPosition; @@ -719,15 +725,15 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], currentColor.a = LUTColor.a - currentColor.a; //only possible alpha sources are layers 0 or 1 - if(alphaSrc == 0) { + if(alphaSrc == 0 && passCount > 0) { - half4 layerColor = layers.sampleLayer(0, sourceTypes[2], in.texCoord3); + half4 layerColor = layers.sampleLayer(0, in.vtxColor,sourceTypes[2], in.texCoord3); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; - } else if(alphaSrc == 1) { + } else if(alphaSrc == 1 && passCount > 1) { - half4 layerColor = layers.sampleLayer(1, sourceTypes[2], in.texCoord3); + half4 layerColor = layers.sampleLayer(1, in.vtxColor, sourceTypes[2], in.texCoord3); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index f942fa63d8..b6037b678a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -130,7 +130,6 @@ struct plMetalShaderLightSource { typedef struct { uint32_t UVWSrc; - uint32_t flags; matrix_float4x4 transform; } UVOutDescriptor; @@ -139,7 +138,6 @@ typedef struct //transformation matrix_float4x4 projectionMatrix; matrix_float4x4 localToWorldMatrix; - matrix_float4x4 worldToLocalMatrix; matrix_float4x4 cameraToWorldMatrix; matrix_float4x4 worldToCameraMatrix; @@ -163,6 +161,9 @@ typedef struct uint8_t numUVSrcs; UVOutDescriptor uvTransforms[8]; +#ifdef __METAL_VERSION__ + float3 sampleLocation(size_t index, thread float3 *texCoords, const float4 normal, const float4 camPosition) constant; +#endif } VertexUniforms; #endif /* ShaderTypes_h */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index e6514970f8..e3639811f6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -293,13 +293,13 @@ void plMetalDevice::BeginNewRenderPass() { renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); - renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); if (fSampleCount == 1) { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); } else { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); } fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); @@ -916,7 +916,11 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) drawable->texture()->height(), false); msaaColorTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); - msaaColorTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { + msaaColorTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + msaaColorTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } msaaColorTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); msaaColorTextureDescriptor->setSampleCount(fSampleCount); fCurrentFragmentMSAAOutputTexture = fMetalDevice->newTexture(msaaColorTextureDescriptor); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 52c77bdf59..8f69d1fa1f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -228,7 +228,6 @@ void plMetalMaterialShaderRef::EncodeTransform(plLayerInterface* layer, UVOutDes matrix_float4x4 tXfm; hsMatrix2SIMD(layer->GetTransform(), &tXfm); transform->transform = tXfm; - transform->flags = layer->GetMiscFlags(); transform->UVWSrc = layer->GetUVWSrc(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 3bc9bbdd18..0087a7b22b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -970,7 +970,6 @@ void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& fCurrentRenderPassUniforms->worldToCameraMatrix = fDevice.fMatrixW2C; fCurrentRenderPassUniforms->cameraToWorldMatrix = fDevice.fMatrixC2W; fCurrentRenderPassUniforms->localToWorldMatrix = fDevice.fMatrixL2W; - fCurrentRenderPassUniforms->worldToLocalMatrix = fDevice.fMatrixW2L; } void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, @@ -1084,7 +1083,6 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, if( fShadows.size() ) { //if we had to render aux spans, we probably changed the vertex and index buffer //reset those - fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); @@ -1823,9 +1821,9 @@ bool plMetalPipeline::ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup //MTL::PurgeableState bufferState = vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateNonVolatile); if (vRef->Expired(fVtxRefTime)) { IRefreshDynVertices(group, vRef); - fDevice.GetCurrentCommandBuffer()->addCompletedHandler( ^(MTL::CommandBuffer *buffer) { + //fDevice.GetCurrentCommandBuffer()->addCompletedHandler( ^(MTL::CommandBuffer *buffer) { //vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); - }); + //}); } if (iRef->IsDirty()) { @@ -2390,7 +2388,6 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) uniforms.projectionMatrix = projMat; matrix_float4x4 modelMatrix; uniforms.worldToCameraMatrix = modelMatrix; - uniforms.uvTransforms[0].flags = 0; uniforms.uvTransforms[0].UVWSrc = 0; uniforms.numUVSrcs = 1; //uniforms.worldToLocalMatrix = fDevice.fMatrixW2L; @@ -3162,7 +3159,6 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) hsMatrix2SIMD(castLUT, &tXfm); fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; - fCurrentRenderPassUniforms->uvTransforms[0].flags = 0; fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; /*DWORD clearColor = 0xff000000L; @@ -3679,7 +3675,12 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fCurrentPipelineState = linkedPipeline->pipelineState; } - fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + + if (fCurrentVertexBuffer != vRef->GetBuffer()) { + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fCurrentVertexBuffer = vRef->GetBuffer(); + } + fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); @@ -3740,10 +3741,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con ISetupShadowLight(fShadows[i]); struct plMetalMaterialPassDescription passDescription; - memset(&passDescription.miscFlags, 0, sizeof(passDescription.miscFlags)); - memset(&passDescription.blendModes, 0, sizeof(passDescription.blendModes)); - memset(&passDescription.passTypes, 0, sizeof(passDescription.passTypes)); - memset(&passDescription.sampleTypes, 0, sizeof(passDescription.sampleTypes)); + memset(&passDescription, 0, sizeof(passDescription)); passDescription.Populate(mat->GetLayer(0), 2); passDescription.numLayers = 3; if (mat->GetNumLayers()>1) { @@ -3862,7 +3860,6 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) // Normal UVW source. fCurrentRenderPassUniforms->uvTransforms[2].UVWSrc = uvwSrc; // MiscFlags to layer's misc flags - fCurrentRenderPassUniforms->uvTransforms[2].flags = layer->GetMiscFlags(); matrix_float4x4 tXfm; hsMatrix2SIMD(layer->GetTransform(), &tXfm); fCurrentRenderPassUniforms->uvTransforms[2].transform = tXfm; @@ -3946,7 +3943,6 @@ void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) hsMatrix2SIMD(cameraToTexture, &tXfm); fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; - fCurrentRenderPassUniforms->uvTransforms[0].flags = 0; fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; // Stage 1: the lut @@ -3955,7 +3951,6 @@ void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) hsMatrix2SIMD(cameraToLut, &tXfm); fCurrentRenderPassUniforms->uvTransforms[1].UVWSrc = plLayerInterface::kUVWPosition; - fCurrentRenderPassUniforms->uvTransforms[1].flags = 0; fCurrentRenderPassUniforms->uvTransforms[1].transform = tXfm; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 68c7d3f489..30083ce875 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -252,8 +252,8 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); - descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); /*descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); @@ -383,7 +383,7 @@ const MTL::Function* plMetalRenderShadowCasterPipelineState::GetVertexFunction(M { NS::Error* error = nullptr; MTL::Function* function = library->newFunction( - NS::String::string("shadowVertexShader", NS::ASCIIStringEncoding), + NS::MakeConstantString("shadowVertexShader"), MakeFunctionConstants(), &error )->autorelease(); @@ -394,7 +394,7 @@ const MTL::Function* plMetalRenderShadowCasterPipelineState::GetFragmentFunction { NS::Error* error = nullptr; MTL::Function* function = library->newFunction( - NS::String::string("shadowFragmentShader", NS::ASCIIStringEncoding), + NS::MakeConstantString("shadowFragmentShader"), MakeFunctionConstants(), &error )->autorelease(); From 2e76abe61029a78f12b0955115850aab441ec39c Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 17 Apr 2022 14:48:33 -0700 Subject: [PATCH 047/165] Fixing bug with aux spans and vertex buffers --- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 0087a7b22b..ccc3484a25 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1066,8 +1066,13 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, IPopPiggyBacks(); // Render any aux spans associated. - if( span.GetNumAuxSpans() ) + if( span.GetNumAuxSpans() ) { IRenderAuxSpans(span); + + //aux spans will change the current vertex buffer, put ours back + fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); + fCurrentVertexBuffer = vRef->GetBuffer(); + } @@ -1088,10 +1093,9 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, IRenderShadowsOntoSpan(render, &span, material, vRef); } - - if( !(fView.fRenderState & kRenderNoProjection) ) - { - } + } + + if ( span.GetNumAuxSpans() || (pass >= 0 && fShadows.size()) ) { } #ifdef _DEBUG From 502ac986beb176fb371134410723a08621c75838 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 17 Apr 2022 15:02:29 -0700 Subject: [PATCH 048/165] Eliminating preshade colors from fragment pass Also getting rid of the buffers and arguments used to back it. They were only used for lighting, which is in the vertex pass. --- .../ShaderSrc/FixedPipelineShaders.metal | 1 - .../ShaderSrc/PlateShaders.metal | 2 -- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 1 - .../plMetalMaterialShaderRef.cpp | 26 +++---------------- .../plMetalMaterialShaderRef.h | 3 +-- 5 files changed, 5 insertions(+), 28 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 15905f51c0..779ee3ce53 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -203,7 +203,6 @@ typedef struct { texturecube cubicTexture6 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 5), function_constant(hasLayer6) ]]; texturecube cubicTexture7 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 6), function_constant(hasLayer7) ]]; texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasLayer8) ]]; - const constant half4* colors [[ buffer(FragmentShaderArgumentAttributeColors) ]]; const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; sampler repeatSampler [[ sampler(0) ]]; sampler clampRepeatSampler [[ sampler(1) ]]; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal index db6e2953b3..898d7d35dd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal @@ -55,8 +55,6 @@ using namespace metal; typedef struct { array, 8> textures [[ id(FragmentShaderArgumentAttributeTextures) ]]; array, 8> cubicTextures [[ id(FragmentShaderArgumentAttributeCubicTextures) ]]; - array colors [[ id(FragmentShaderArgumentAttributeColors) ]]; - plMetalFragmentShaderArgumentBuffer uniforms [[ id(FragmentShaderArgumentAttributeUniforms) ]]; } FragmentShaderArguments; typedef struct diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index b6037b678a..0e8d8d2fe1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -110,7 +110,6 @@ struct plMetalShadowCastFragmentShaderArgumentBuffer { enum plMetalFragmentShaderTextures { FragmentShaderArgumentAttributeTextures = 0, FragmentShaderArgumentAttributeCubicTextures = 8, - FragmentShaderArgumentAttributeColors = 16, FragmentShaderArgumentAttributeUniforms = 32 }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 8f69d1fa1f..86e7875d5f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -85,12 +85,6 @@ void plMetalMaterialShaderRef::Release() } fPassArgumentBuffers.clear(); - for(auto & buffer : fPassColors) { - buffer->release(); - buffer = nil; - } - fPassColors.clear(); - fNumPasses = 0; } @@ -137,10 +131,9 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en assert(i - GetPassIndex(pass) >= 0); EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); - IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer, nullptr); + IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer); } - encoder->setFragmentBuffer(fPassColors[pass], 0, FragmentShaderArgumentAttributeColors); encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, BufferIndexFragArgBuffer); } @@ -153,13 +146,12 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode vertexUniforms->numUVSrcs += piggyBacks->size(); } - simd_float4 colorMap[8]; plMetalFragmentShaderArgumentBuffer uniforms; IHandleMaterial(GetPassIndex(pass), &uniforms, piggyBacks, [&](plLayerInterface* layer, uint32_t index) { layer = preEncodeTransform(layer, index); - IBuildLayerTexture(encoder, index, layer, colorMap); + IBuildLayerTexture(encoder, index, layer); return layer; }, [&](plLayerInterface* layer, uint32_t index) { layer = postEncodeTransform(layer, index); @@ -220,7 +212,6 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode } } - encoder->setFragmentBytes(colorMap, sizeof(colorMap), FragmentShaderArgumentAttributeColors); encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), BufferIndexFragArgBuffer); } @@ -280,7 +271,6 @@ void plMetalMaterialShaderRef::ILoopOverLayers() //I'd like to encode more data here, and use a heap. The heap hasn't happened yet because heaps are //private memory, and we don't have a window yet for a blit phase into private memory. MTL::Buffer *argumentBuffer = fDevice->newBuffer(sizeof(plMetalFragmentShaderArgumentBuffer), MTL::ResourceStorageModeManaged); - MTL::Buffer *colorBuffer = fDevice->newBuffer(sizeof(simd_float4) * 8, MTL::ResourceStorageModeManaged); plMetalFragmentShaderArgumentBuffer *layerBuffer = (plMetalFragmentShaderArgumentBuffer *)argumentBuffer->contents(); @@ -299,14 +289,12 @@ void plMetalMaterialShaderRef::ILoopOverLayers() //encode the colors for this pass into our buffer for fast rendering for(int colorToEncode = 0; colorToEncode < j - iCurrMat; colorToEncode ++) { - IBuildLayerTexture(NULL, colorToEncode, fMaterial->GetLayer(iCurrMat + colorToEncode), (simd_float4*) colorBuffer->contents()); + IBuildLayerTexture(NULL, colorToEncode, fMaterial->GetLayer(iCurrMat + colorToEncode)); } argumentBuffer->didModifyRange(NS::Range(0, argumentBuffer->length())); - colorBuffer->didModifyRange(NS::Range(0, colorBuffer->length())); fPassArgumentBuffers.push_back(argumentBuffer); - fPassColors.push_back(colorBuffer); fPassIndices.push_back(iCurrMat); fPassLengths.push_back(j - iCurrMat); @@ -325,7 +313,7 @@ const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerIn return state; } -void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer, simd_float4 *colorMap) +void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer) { fPipeline->CheckTextureRef(layer); plBitmap* texture = layer->GetTexture(); @@ -339,12 +327,6 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); } - } else { - hsColorRGBA preshadeColor = layer->GetPreshadeColor(); - colorMap[offsetFromRootLayer].r = preshadeColor.r; - colorMap[offsetFromRootLayer].g = preshadeColor.g; - colorMap[offsetFromRootLayer].b = preshadeColor.b; - colorMap[offsetFromRootLayer].a = preshadeColor.a; } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 0eb7e765d4..d97ba131f5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -64,7 +64,6 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef //FIXME: This should be retained/released MTL::Device* fDevice; std::vector fPassArgumentBuffers; - std::vector fPassColors; public: void Link(plMetalMaterialShaderRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } @@ -106,7 +105,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef bool ICanEatLayer(plLayerInterface* lay); uint32_t ILayersAtOnce(uint32_t which); - void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer, simd_float4 *colorMap); + void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer); void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); }; From 50b301a8dd67683b98411ef780bfca85cf47eec2 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 17 Apr 2022 22:33:08 -0700 Subject: [PATCH 049/165] Implementing offscreen render support Ki screenshotting no longer means death --- .../pfMetalPipeline/plMetalDevice.cpp | 8 ++ .../pfMetalPipeline/plMetalPipeline.cpp | 82 +++++++++++++++++-- 2 files changed, 83 insertions(+), 7 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index e3639811f6..7d05d0dce5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -331,6 +331,11 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) if( fCurrentOffscreenCommandBuffer ) { fCurrentOffscreenCommandBuffer->enqueue(); fCurrentOffscreenCommandBuffer->commit(); + if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { + //if it's an offscreen buffer, wait for completion + //something is probably going to want to syncronously grab data + fCurrentOffscreenCommandBuffer->waitUntilCompleted(); + } fCurrentOffscreenCommandBuffer->release(); fCurrentOffscreenCommandBuffer = nil; } @@ -345,6 +350,9 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) } if(fCurrentRenderTarget) { + if(!target->GetDeviceRef()) { + fPipeline->MakeRenderTargetRef(target); + } plMetalRenderTargetRef *deviceTarget= (plMetalRenderTargetRef *)target->GetDeviceRef(); fCurrentOffscreenCommandBuffer = fCommandQueue->commandBuffer(); fCurrentOffscreenCommandBuffer->retain(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index ccc3484a25..352dddbb49 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -546,10 +546,43 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) // Offscreen isn't currently used for anything. else if (owner->GetFlags() & plRenderTarget::kIsOffscreen) { /// Create a blank surface - //if (ref) - // ref->Set(surfFormat, 0, owner); - //else - // ref = new plGLRenderTargetRef(surfFormat, 0, owner); + + if (!ref) + ref = new plMetalRenderTargetRef(); + + MTL::TextureDescriptor *textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setWidth(owner->GetWidth()); + textureDescriptor->setHeight(owner->GetHeight()); + textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + textureDescriptor->setStorageMode(MTL::StorageModeShared); + + plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); + MTL::Texture * texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + textureDescriptor->release(); + + //if the ref already has an old texture, release it + if(ref->fTexture) + ref->fTexture->release(); + if(ref->fDepthBuffer) + ref->fDepthBuffer->release(); + ref->fTexture = texture; + ref->fDepthBuffer = depthBuffer; + ref->fOwner = owner; + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + return ref; } // Keep it in a linked list for ready destruction. @@ -792,9 +825,44 @@ bool plMetalPipeline::CaptureScreen(plMipmap *dest, bool flipVertical, uint16_t plMipmap *plMetalPipeline::ExtractMipMap(plRenderTarget *targ) { - //FIXME: Add mip map extraction - //find who calls this to test - return nullptr; + if( plCubicRenderTarget::ConvertNoRef(targ) ) + return nullptr; + + if( targ->GetPixelSize() != 32 ) + { + hsAssert(false, "Only RGBA8888 currently implemented"); + return nullptr; + } + + plMetalRenderTargetRef* ref = (plMetalRenderTargetRef*)targ->GetDeviceRef(); + if( !ref ) + return nullptr; + + const int width = targ->GetWidth(); + const int height = targ->GetHeight(); + + plMipmap* mipMap = new plMipmap(width, height, plMipmap::kARGB32Config, 1); + + uint8_t* ptr = (uint8_t*)(ref->fTexture->buffer()->contents()); + const int pitch = ref->fTexture->width() * 4; + + ref->fTexture->getBytes(mipMap->GetAddr32(0, 0), pitch, MTL::Region(0, 0, width, height), 0); + + const uint32_t blackOpaque = 0xff000000; + int y; + for( y = 0; y < height; y++ ) + { + uint32_t* destPtr = mipMap->GetAddr32(0, y); + uint32_t* srcPtr = (uint32_t*)destPtr; + int x; + for( x = 0; x < width; x++ ) + { + destPtr[x] = srcPtr[x] | blackOpaque; + } + ptr += pitch; + } + + return mipMap; } void plMetalPipeline::GetSupportedDisplayModes(std::vector *res, int ColorDepth) From fd7c09125c068494e38c4c916c4ba331bb4be2c2 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 18 Apr 2022 22:14:14 -0700 Subject: [PATCH 050/165] Fix for KI snapshots on discrete cards Texture needs to be blitted out to CPU for discrete cards. --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 6 ++++++ .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 7d05d0dce5..ea3bf4d6c2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -329,6 +329,12 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) } if( fCurrentOffscreenCommandBuffer ) { + if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { + //if our target was offscreen, go ahead and blit back. Something will want this data. + MTL::BlitCommandEncoder* blitEncoder = fCurrentOffscreenCommandBuffer->blitCommandEncoder(); + blitEncoder->synchronizeResource(fCurrentFragmentOutputTexture); + blitEncoder->endEncoding(); + } fCurrentOffscreenCommandBuffer->enqueue(); fCurrentOffscreenCommandBuffer->commit(); if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 352dddbb49..583918fe99 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -555,7 +555,7 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) textureDescriptor->setHeight(owner->GetHeight()); textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); - textureDescriptor->setStorageMode(MTL::StorageModeShared); + textureDescriptor->setStorageMode(MTL::StorageModeManaged); plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); MTL::Texture * texture = fDevice.fMetalDevice->newTexture(textureDescriptor); From 6d4fc83edec7d113834eeb4fb10ea1c2af047bc8 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 18 Apr 2022 22:15:25 -0700 Subject: [PATCH 051/165] Fixes for cubic target accidental recreation After a resolution change cubic render targets are always marked as dirty. This causes problems with the KI snapshot renders which will try to recreate the buffer and not re-render it. --- .../pfMetalPipeline/plMetalPipeline.cpp | 58 ++++++++----------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 583918fe99..bb8eca8a03 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -446,38 +446,37 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) else { fRef = new plMetalRenderTargetRef(); - fRef->SetDirty(true); face->SetDeviceRef(fRef); ( (plMetalRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList ); // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) hsRefCnt_SafeUnRef( face->GetDeviceRef() ); } - if(fRef->IsDirty()) { - static const uint kFaceMapping[] = { - 1, // kLeftFace - 0, // kRightFace - 4, // kFrontFace - 5, // kBackFace - 2, // kTopFace - 3 // kBottomFace - }; - - if(fRef->fTexture) { - fRef->fTexture->release(); - fRef->fTexture = nullptr; - } - - if(fRef->fDepthBuffer) { - fRef->fDepthBuffer->release(); - fRef->fDepthBuffer = nullptr; - } - - fRef->fTexture = texture->newTextureView(MTL::PixelFormatBGRA8Unorm, MTL::TextureType2D, NS::Range::Make(0, 1), NS::Range::Make(kFaceMapping[i], 1)); - //in since the depth buffer is shared each render target gets their own retain - fRef->fDepthBuffer = depthBuffer->retain(); - fRef->SetDirty(false); + + //in since the root texture has changed reload all the face textures + static const uint kFaceMapping[] = { + 1, // kLeftFace + 0, // kRightFace + 4, // kFrontFace + 5, // kBackFace + 2, // kTopFace + 3 // kBottomFace + }; + + if(fRef->fTexture) { + fRef->fTexture->release(); + fRef->fTexture = nullptr; } + + if(fRef->fDepthBuffer) { + fRef->fDepthBuffer->release(); + fRef->fDepthBuffer = nullptr; + } + + fRef->fTexture = texture->newTextureView(MTL::PixelFormatBGRA8Unorm, MTL::TextureType2D, NS::Range::Make(0, 1), NS::Range::Make(kFaceMapping[i], 1)); + //in since the depth buffer is shared each render target gets their own retain + fRef->fDepthBuffer = depthBuffer->retain(); + fRef->SetDirty(false); } //if the ref already has an old texture, release it @@ -500,6 +499,7 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) if (ref != nullptr && !ref->IsLinked()) ref->Link(&fRenderTargetRefList); } + ref->SetDirty(false); return ref; } @@ -2932,14 +2932,6 @@ void plMetalPipeline::IReleaseDynDeviceObjects() rtRef->Release(); rtRef->Unlink(); } - - //FIXME: Materials wouldn't normally be dynamic resources. But... the buffers can reference render targets which we are swapping. Might be able to fix this if shader references aren't encoded into the material. Piggybacks already aren't included, and it's complicating the fragment shader. So it might be better just to directly load texture references. - while( fMatRefList ) - { - plMetalMaterialShaderRef* matRef = fMatRefList; - matRef->Release(); - matRef->Unlink(); - } // The shared dynamic vertex buffers used by things like objects skinned on CPU, or // particle systems. From 008dfb862c46522c08558c75dde3932ee16a4479 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 23 Apr 2022 14:15:55 -0700 Subject: [PATCH 052/165] Fixing not using correct number of piggybacks This was causing a Metal compiler crash because inconsistant function constants were being passed. --- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index bb8eca8a03..67e7680b48 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1509,7 +1509,11 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons mRef->GetSampleTypeArray(sampleTypes, pass); } else { - mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fPiggyBackStack, + //Plasma pulls piggybacks from the rear first, pull the number of active piggybacks + auto firstPiggyback = fPiggyBackStack.end() - numActivePiggyBacks; + auto lastPiggyback = fPiggyBackStack.end(); + std::vector subPiggybacks(firstPiggyback, lastPiggyback); + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &subPiggybacks, [&](plLayerInterface* layer, uint32_t index){ if(index==0) { layer = IPushOverBaseLayer(layer); From 7122c024597a0f28ddeac309bd4066f87e1885d1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 23 Apr 2022 14:16:37 -0700 Subject: [PATCH 053/165] Fixing graphics options not working MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to supply at least one display mode. This isn’t perfect - it causes graphics options to set the plate/cursor layer to 800x600. But it’s better. --- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 67e7680b48..116583b913 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -878,6 +878,15 @@ void plMetalPipeline::GetSupportedDisplayModes(std::vector *res, Ideally we should support some sort of scaling/semi dynamic renderbuffer resolution thing. But don't mess with the window servers framebuffer size. macOS has accelerated resolution scaling like consoles do. Use that. */ + + std::vector supported; + plDisplayMode mode; + mode.Width = 800; + mode.Height = 600; + mode.ColorDepth = 32; + supported.push_back(mode); + + *res = supported; } int plMetalPipeline::GetMaxAnisotropicSamples() From 0df7b78a538476ce082f44427385d79161a79d5e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 24 Apr 2022 17:11:18 -0700 Subject: [PATCH 054/165] Implementing post processing/gamma --- .../pfMetalPipeline/plMetalDevice.cpp | 81 +++++++++++++++++-- .../pfMetalPipeline/plMetalDevice.h | 10 +++ .../pfMetalPipeline/plMetalPipeline.cpp | 65 ++++++++++++++- 3 files changed, 146 insertions(+), 10 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index ea3bf4d6c2..892544ceef 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -298,7 +298,15 @@ void plMetalDevice::BeginNewRenderPass() { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); } else { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); - renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); + + //if we need postprocessing, output to the main pass texture + //otherwise we can go straight to the drawable + if (NeedsPostprocessing()) { + renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentUnprocessedOutputTexture); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); + } + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); } @@ -387,7 +395,10 @@ plMetalDevice::plMetalDevice() fCurrentOffscreenCommandBuffer(nullptr), fCurrentRenderTarget(nullptr), fNewPipelineStateMap(), - fCurrentFragmentMSAAOutputTexture(nullptr) + fCurrentFragmentMSAAOutputTexture(nullptr), + fCurrentUnprocessedOutputTexture(nullptr), + fGammaLUTTexture(nullptr), + fGammaAdjustState(nullptr) { fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; @@ -425,6 +436,8 @@ plMetalDevice::plMetalDevice() fReverseZStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); depthDescriptor->release(); + + CreateGammaAdjustState(); } void plMetalDevice::SetViewport() { @@ -897,11 +910,12 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) { fCurrentCommandBuffer = fCommandQueue->commandBuffer(); fCurrentCommandBuffer->retain(); + + bool depthNeedsRebuild = fCurrentDrawableDepthTexture == nullptr; + depthNeedsRebuild |= drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || drawable->texture()->height() != fCurrentDrawableDepthTexture->height(); + //cache the depth buffer, we'll just clear it every time. - if(fCurrentDrawableDepthTexture == nullptr || - drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || - drawable->texture()->height() != fCurrentDrawableDepthTexture->height() - ) { + if(depthNeedsRebuild) { if(fCurrentDrawableDepthTexture) { fCurrentDrawableDepthTexture->release(); fCurrentFragmentMSAAOutputTexture->release(); @@ -942,6 +956,15 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); } } + + //Do we need to create a unprocessed output texture? + //If the depth needs to be rebuilt - we probably need to rebuild this one too + if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) { + MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false); + fCurrentUnprocessedOutputTexture->release(); + fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor); + } + fCurrentDrawable = drawable->retain(); } @@ -1089,6 +1112,10 @@ void plMetalDevice::SubmitCommandBuffer() fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; + if( NeedsPostprocessing() ) { + PostprocessIntoDrawable(); + } + fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); fCurrentCommandBuffer->enqueue(); fCurrentCommandBuffer->commit(); @@ -1108,6 +1135,48 @@ void plMetalDevice::SubmitCommandBuffer() fClearDrawableDepth = 1.0; } +void plMetalDevice::CreateGammaAdjustState() { + MTL::RenderPipelineDescriptor *gammaDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + MTL::Library* library = fMetalDevice->newDefaultLibrary(); + + gammaDescriptor->setVertexFunction(library->newFunction(NS::MakeConstantString("gammaCorrectVertex"))->autorelease()); + gammaDescriptor->setFragmentFunction(library->newFunction(NS::MakeConstantString("gammaCorrectFragment"))->autorelease()); + + library->release(); + + gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + + NS::Error *error; + fGammaAdjustState->release(); + fGammaAdjustState = fMetalDevice->newRenderPipelineState(gammaDescriptor, &error); +} + +void plMetalDevice::PostprocessIntoDrawable() { + + //Gamma adjust + MTL::RenderPassDescriptor* gammaPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + gammaPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture()); + gammaPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionDontCare); + + MTL::RenderCommandEncoder* gammaAdjustEncoder = fCurrentCommandBuffer->renderCommandEncoder(gammaPassDescriptor); + + gammaAdjustEncoder->setRenderPipelineState(fGammaAdjustState); + + static const float fullFrameCoords[16] = { + //first pair is vertex, second pair is texture + -1, -1, 0, 1, + 1, -1, 1, 1, + -1, 1, 0, 0, + 1, 1, 1, 0 + }; + gammaAdjustEncoder->setVertexBytes(&fullFrameCoords, sizeof(fullFrameCoords), 0); + gammaAdjustEncoder->setFragmentTexture(fCurrentUnprocessedOutputTexture, 0); + gammaAdjustEncoder->setFragmentTexture(fGammaLUTTexture, 1); + gammaAdjustEncoder->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); + gammaAdjustEncoder->endEncoding(); +} + std::size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMetalPipelineRecord const& s) const noexcept { std::size_t value = std::hash()(s.depthFormat); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 4d5a46b2f4..9b9ff552c2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -207,6 +207,8 @@ class plMetalDevice protected: plMetalLinkedPipeline* PipelineState(plMetalPipelineState* pipelineState); + MTL::Texture* fGammaLUTTexture; + private: //these are internal bits for backing the current render pass //private because the functions should be used to keep a consistant @@ -217,6 +219,7 @@ class plMetalDevice MTL::Texture* fCurrentDrawableDepthTexture; MTL::Texture* fCurrentFragmentOutputTexture; + MTL::Texture* fCurrentUnprocessedOutputTexture; MTL::Texture* fCurrentFragmentMSAAOutputTexture; CA::MetalDrawable* fCurrentDrawable; @@ -230,6 +233,13 @@ class plMetalDevice plRenderTarget* fCurrentRenderTarget; MTL::SamplerState* fSamplerStates[4]; + bool NeedsPostprocessing() { + return fGammaLUTTexture != nullptr; + } + void PostprocessIntoDrawable(); + void CreateGammaAdjustState(); + MTL::RenderPipelineState* fGammaAdjustState; + void BeginNewRenderPass(); void ReleaseSamplerStates(); }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 116583b913..239d9c80c3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -807,14 +807,71 @@ void plMetalPipeline::LoadResources() bool plMetalPipeline::SetGamma(float eR, float eG, float eB) { - //FIXME: Implement Gamma - return false; + uint16_t tabR[256]; + uint16_t tabG[256]; + uint16_t tabB[256]; + + tabR[0] = tabG[0] = tabB[0] = 0L; + + plConst(float) kMinE(0.1f); + if( eR > kMinE ) + eR = 1.f / eR; + else + eR = 1.f / kMinE; + if( eG > kMinE ) + eG = 1.f / eG; + else + eG = 1.f / kMinE; + if( eB > kMinE ) + eB = 1.f / eB; + else + eB = 1.f / kMinE; + + int i; + for( i = 1; i < 256; i++ ) + { + float orig = float(i) / 255.f; + + float gamm; + gamm = pow(orig, eR); + gamm *= float(uint16_t(-1)); + tabR[i] = uint16_t(gamm); + + gamm = pow(orig, eG); + gamm *= float(uint16_t(-1)); + tabG[i] = uint16_t(gamm); + + gamm = pow(orig, eB); + gamm *= float(uint16_t(-1)); + tabB[i] = uint16_t(gamm); + } + + SetGamma(tabR, tabG, tabB); + + return true; } bool plMetalPipeline::SetGamma(const uint16_t *const tabR, const uint16_t *const tabG, const uint16_t *const tabB) { - //FIXME: Implement Gamma - return false; + //allocate a new buffer every time so we don't cause problems with a running render pass + if(fDevice.fGammaLUTTexture) { + fDevice.fGammaLUTTexture->release(); + fDevice.fGammaLUTTexture = nullptr; + } + + MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); + texDescriptor->setTextureType(MTL::TextureType1DArray); + texDescriptor->setWidth(256); + texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); + texDescriptor->setArrayLength(3); + + fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); + + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 0, tabR, 256 * sizeof(uint16_t), 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 1, tabG, 256 * sizeof(uint16_t), 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 2, tabB, 256 * sizeof(uint16_t), 0); + + return true; } bool plMetalPipeline::CaptureScreen(plMipmap *dest, bool flipVertical, uint16_t desiredWidth, uint16_t desiredHeight) From d44b6b6ae2e36c9bf8147efcbbc29b11360e7fac Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 24 Apr 2022 17:11:33 -0700 Subject: [PATCH 055/165] Small optimization for grass shader --- .../FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal index 145c6c8ba7..bc10488b2a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal @@ -71,15 +71,15 @@ vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], return out; } -fragment float4 ps_GrassShader(vs_GrassInOut in [[stage_in]], - texture2d t0 [[ texture(0) ]]) { +fragment half4 ps_GrassShader(vs_GrassInOut in [[stage_in]], + texture2d t0 [[ texture(0) ]]) { constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, address::repeat); - float4 out = t0.sample(colorSampler, in.texCoord.xy); - out *= in.color; + half4 out = t0.sample(colorSampler, in.texCoord.xy); + out *= half4(in.color); if(out.a <= 0.1) discard_fragment(); return out; From 6a6d5c768a500424fc305c18628c25cca67ad2b0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 24 Apr 2022 18:04:29 -0700 Subject: [PATCH 056/165] Fixes for shadowcasting My optimization broke shadow cast rendering. :( Fixed! --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 779ee3ce53..69850ac746 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -686,9 +686,9 @@ vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], const float4 vCamPosition = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 1.0)); - float4x4 matrix = uniforms.uvTransforms[0].transform * uniforms.cameraToWorldMatrix; + const float4x4 matrix = uniforms.uvTransforms[0].transform; - out.texCoord1 = (vCamPosition * matrix).xyz; + out.texCoord1 = (matrix * vCamPosition).xyz; out.position = uniforms.projectionMatrix * vCamPosition; @@ -705,7 +705,7 @@ fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], - texture2d texture [[ texture(16) ]], + texture2d texture [[ texture(16) ]], constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(BufferIndexShadowCastFragArgBuffer) ]], FragmentShaderArguments layers, constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowAlphaSrc) ]]) @@ -714,7 +714,7 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], if(fragmentUniforms.pointLightCast) { sampleCoords.xy /= sampleCoords.z; } - half4 currentColor = half4(texture.sample(colorSamplers[3], sampleCoords.xy)); + half4 currentColor = texture.sample(colorSamplers[3], sampleCoords.xy); currentColor.rgb *= in.vtxColor.rgb; const float2 LUTCoords = in.texCoord2.xy; From f0bb4dde0c1a565940502149a049c99f4c30e775 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 24 Apr 2022 22:10:02 -0700 Subject: [PATCH 057/165] Adding missing gamma correct shader --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 + .../ShaderSrc/GammaCorrection.metal | 85 +++++++++++++++++++ 2 files changed, 86 insertions(+) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 7266ef502e..0aa2e17da3 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -130,6 +130,7 @@ elseif(APPLE) ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrect.metal ) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal new file mode 100644 index 0000000000..3bd12b5fe2 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal @@ -0,0 +1,85 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; + +struct GammaVertexIn +{ + float2 position [[ attribute(0) ]]; + float2 texturePosition [[ attribute(0) ]]; +}; + +struct GammaVertexOut +{ + float4 position [[ position ]]; + float2 texturePosition; +}; + +vertex GammaVertexOut gammaCorrectVertex(constant GammaVertexIn *in [[ buffer(0) ]], + uint vertexID [[ vertex_id ]]) +{ + GammaVertexOut out; + // Just pass the position through. We're clearing in NDC space. + out.position = float4(in[vertexID].position, 0.5, 1.0); + out.texturePosition = float2(in[vertexID].texturePosition); + return out; +} + +const constant sampler sourceSampler = sampler(); +const constant sampler lutSampler = sampler( + filter::linear + ); + +fragment half4 gammaCorrectFragment( + GammaVertexOut in [[stage_in]], + texture2d inputTexture [[texture(0)]], + texture1d_array LUT [[texture(1)]] + ) +{ + float4 color = inputTexture.sample(sourceSampler, in.texturePosition); + half4 out = half(1); + out.r = half(float(LUT.sample(lutSampler, color.r, 0).x)/USHRT_MAX); + out.g = half(float(LUT.sample(lutSampler, color.g, 1).x)/USHRT_MAX); + out.b = half(float(LUT.sample(lutSampler, color.b, 2).x)/USHRT_MAX); + return out; +} From d0266bf68d6ed703ec3809d0f4ea34452e490e18 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 24 Apr 2022 22:32:42 -0700 Subject: [PATCH 058/165] Fixing menu going missing sometimes The new manual clear could have its draw culled if backface or frontface culling was set before the clear pass. Disabling culling. I could wind my rect the other way - but the engine is allowed to front face cull. Easiest to just turn it off. --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 892544ceef..97aa97b70b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -216,6 +216,7 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh float clearDepth = 1.0f; CurrentRenderCommandEncoder()->setDepthStencilState(fNoZReadStencilState); + CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); CurrentRenderCommandEncoder()->setVertexBytes(&clearCoords, sizeof(clearCoords), 0); CurrentRenderCommandEncoder()->setFragmentBytes(&halfClearColor, sizeof(halfClearColor), 0); CurrentRenderCommandEncoder()->setFragmentBytes(&clearDepth, sizeof(float), 1); From 6220ac35e04f39f4aaaa6906e828bfd1d8854dc9 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 26 Apr 2022 17:20:42 -0700 Subject: [PATCH 059/165] Fixing crashes on Intel integrated graphics. It was having trouble with the sampleTypes being defined as size_t - which is way too large anyway. uint32_t is probably too large too but going with what works for now. Also doing some other cleanup and fixing unified vs Apple family capability mixups. --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 2 +- .../ShaderSrc/FixedPipelineShaders.metal | 48 ++++++++----------- .../pfMetalPipeline/plMetalDevice.cpp | 5 +- .../pfMetalPipeline/plMetalPipelineState.cpp | 2 +- .../pfMetalPipeline/plMetalPipelineState.h | 2 +- 5 files changed, 24 insertions(+), 35 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 0aa2e17da3..a851657e01 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -130,7 +130,7 @@ elseif(APPLE) ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrect.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal ) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 69850ac746..1d738adeba 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -170,20 +170,20 @@ constant const uint32_t miscFlags6 [[ function_constant(FunctionConstantLayerFla constant const uint32_t miscFlags7 [[ function_constant(FunctionConstantLayerFlags + 6) ]]; constant const uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFlags + 7) ]]; -constant const size_t sampleType1 [[ function_constant(FunctionConstantSampleTypes + 0) ]]; -constant const size_t sampleType2 [[ function_constant(FunctionConstantSampleTypes + 1) ]]; -constant const size_t sampleType3 [[ function_constant(FunctionConstantSampleTypes + 2) ]]; -constant const size_t sampleType4 [[ function_constant(FunctionConstantSampleTypes + 3) ]]; -constant const size_t sampleType5 [[ function_constant(FunctionConstantSampleTypes + 4) ]]; -constant const size_t sampleType6 [[ function_constant(FunctionConstantSampleTypes + 5) ]]; -constant const size_t sampleType7 [[ function_constant(FunctionConstantSampleTypes + 6) ]]; -constant const size_t sampleType8 [[ function_constant(FunctionConstantSampleTypes + 7) ]]; +constant const uint32_t sampleType1 [[ function_constant(FunctionConstantSampleTypes + 0) ]]; +constant const uint32_t sampleType2 [[ function_constant(FunctionConstantSampleTypes + 1) ]]; +constant const uint32_t sampleType3 [[ function_constant(FunctionConstantSampleTypes + 2) ]]; +constant const uint32_t sampleType4 [[ function_constant(FunctionConstantSampleTypes + 3) ]]; +constant const uint32_t sampleType5 [[ function_constant(FunctionConstantSampleTypes + 4) ]]; +constant const uint32_t sampleType6 [[ function_constant(FunctionConstantSampleTypes + 5) ]]; +constant const uint32_t sampleType7 [[ function_constant(FunctionConstantSampleTypes + 6) ]]; +constant const uint32_t sampleType8 [[ function_constant(FunctionConstantSampleTypes + 7) ]]; #define MAX_BLEND_PASSES 8 constant const uint8_t sourceTypes[MAX_BLEND_PASSES] = { sourceType1, sourceType2, sourceType3, sourceType4, sourceType5, sourceType6, sourceType7, sourceType8}; constant const uint32_t blendModes[MAX_BLEND_PASSES] = { blendModes1, blendModes2, blendModes3, blendModes4, blendModes5, blendModes6, blendModes7, blendModes8}; constant const uint32_t miscFlags[MAX_BLEND_PASSES] = { miscFlags1, miscFlags2, miscFlags3, miscFlags4, miscFlags5, miscFlags6, miscFlags7, miscFlags8}; -constant const size_t sampleTypes[MAX_BLEND_PASSES] = { sampleType1, sampleType2, sampleType3, sampleType4, sampleType5, sampleType6, sampleType7, sampleType8}; +constant const uint32_t sampleTypes[MAX_BLEND_PASSES] = { sampleType1, sampleType2, sampleType3, sampleType4, sampleType5, sampleType6, sampleType7, sampleType8}; constant const uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); typedef struct { @@ -239,32 +239,27 @@ constant constexpr sampler colorSamplers[] = { sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, - address::repeat, - max_anisotropy(16)), + address::repeat), sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, s_address::clamp_to_edge, - t_address::repeat, - max_anisotropy(16)), + t_address::repeat), sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, s_address::repeat, - t_address::clamp_to_edge, - max_anisotropy(16)), + t_address::clamp_to_edge), sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, - address::clamp_to_edge, - max_anisotropy(16)), + address::clamp_to_edge), }; vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - constant float4x4 & blendMatrix1 [[ buffer(BufferIndexBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]], - uint v_id [[vertex_id]]) + constant float4x4 & blendMatrix1 [[ buffer(BufferIndexBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) { ColorInOut out; //we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. @@ -493,11 +488,9 @@ half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 verte with a constant. Using an array based lookup was hurting performance by about 1/3rd on Apple Silicon. */ - size_t sampleType = sampleTypes[index]; - sampler colorSampler; - if(sampleType == 0) { - colorSampler = repeatSampler; - } else if(sampleType == 1) { + const uint32_t sampleType = sampleTypes[index]; + sampler colorSampler = repeatSampler; + if(sampleType == 1) { colorSampler = clampRepeatSampler; } else if(sampleType == 2) { colorSampler = repeatClampSampler; @@ -533,8 +526,6 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], */ if (!(passCount==1 && sourceTypes[0] == PassTypeColor)) { - half4 color; - /* Note: For loop should be unrolled by the compiler, but it is very sensitive. Always use size_t for the loop interator type. @@ -543,7 +534,7 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], float3 sampleCoord = (&in.texCoord1)[layer]; - color = fragmentShaderArgs.sampleLayer(layer, in.vtxColor, sourceTypes[layer], sampleCoord); + half4 color = fragmentShaderArgs.sampleLayer(layer, in.vtxColor, sourceTypes[layer], sampleCoord); if(layer==0) { blendFirst(color, currentColor, blendModes[layer]); @@ -679,8 +670,7 @@ constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t b } vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - uint v_id [[vertex_id]]) + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]]) { ShadowCasterInOut out; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 97aa97b70b..2753b5b154 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -839,10 +839,9 @@ void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* im MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(tRef->fFormat, img->GetWidth(), img->GetHeight(), supportsMipMap); descriptor->setUsage(MTL::TextureUsageShaderRead); //if device has unified memory, set storage mode to shared - if(fMetalDevice->hasUnifiedMemory()) { + if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { descriptor->setStorageMode(MTL::StorageModeShared); } - descriptor->setUsage(MTL::TextureUsageShaderRead); //Metal gets mad if we set this with 0, only set it if we know there are mipmaps if(supportsMipMap) { descriptor->setMipmapLevelCount(tRef->fLevels + 1); @@ -863,7 +862,7 @@ void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plCubic } descriptor->setUsage(MTL::TextureUsageShaderRead); //if device has unified memory, set storage mode to shared - if(fMetalDevice->hasUnifiedMemory()) { + if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { descriptor->setStorageMode(MTL::StorageModeShared); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 30083ce875..7eb1e3742c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -104,7 +104,7 @@ void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstan constants->setConstantValues(&fPassDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); constants->setConstantValues(&fPassDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); constants->setConstantValues(&fPassDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); - constants->setConstantValues(&fPassDescription.sampleTypes, MTL::DataTypeULong, NS::Range(FunctionConstantSampleTypes, 8)); + constants->setConstantValues(&fPassDescription.sampleTypes, MTL::DataTypeUInt, NS::Range(FunctionConstantSampleTypes, 8)); } size_t plMetalMaterialPassPipelineState::GetHash() const { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 3c418fc80b..cf96edaa3f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -116,7 +116,7 @@ struct plMetalMaterialPassDescription { uint8_t passTypes[8]; uint32_t blendModes[8]; uint32_t miscFlags[8]; - size_t sampleTypes[8]; + uint32_t sampleTypes[8]; uint8_t numLayers; bool operator==(const plMetalMaterialPassDescription &p) const { From 603e9ac93bae71c4ba5e1f776f0f094e933b7823 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 26 Apr 2022 22:46:43 -0700 Subject: [PATCH 060/165] Fixing postprocess texture options That should be LoadActionDontCare, and StoreActionStore. I had the opposite. Whoops! --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 2753b5b154..8e5602a884 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -961,6 +961,7 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) //If the depth needs to be rebuilt - we probably need to rebuild this one too if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) { MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false); + mainPassDescriptor->setStorageMode(MTL::StorageModePrivate); fCurrentUnprocessedOutputTexture->release(); fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor); } @@ -1155,9 +1156,9 @@ void plMetalDevice::PostprocessIntoDrawable() { //Gamma adjust MTL::RenderPassDescriptor* gammaPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); - gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); + gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionDontCare); gammaPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture()); - gammaPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionDontCare); + gammaPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); MTL::RenderCommandEncoder* gammaAdjustEncoder = fCurrentCommandBuffer->renderCommandEncoder(gammaPassDescriptor); From afdee69dca107b0f47533682073360dbbe170da1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 10 May 2022 21:14:34 -0700 Subject: [PATCH 061/165] Fixing render glitch due to sampleType length mixup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bitten by this again. There’s got to be a better way. Also dropping the size down to uint8_t. --- .../ShaderSrc/FixedPipelineShaders.metal | 20 +++++++++---------- .../plMetalMaterialShaderRef.cpp | 2 +- .../plMetalMaterialShaderRef.h | 2 +- .../pfMetalPipeline/plMetalPipeline.cpp | 2 +- .../pfMetalPipeline/plMetalPipelineState.cpp | 2 +- .../pfMetalPipeline/plMetalPipelineState.h | 2 +- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 1d738adeba..6c79f5b50f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -170,20 +170,20 @@ constant const uint32_t miscFlags6 [[ function_constant(FunctionConstantLayerFla constant const uint32_t miscFlags7 [[ function_constant(FunctionConstantLayerFlags + 6) ]]; constant const uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFlags + 7) ]]; -constant const uint32_t sampleType1 [[ function_constant(FunctionConstantSampleTypes + 0) ]]; -constant const uint32_t sampleType2 [[ function_constant(FunctionConstantSampleTypes + 1) ]]; -constant const uint32_t sampleType3 [[ function_constant(FunctionConstantSampleTypes + 2) ]]; -constant const uint32_t sampleType4 [[ function_constant(FunctionConstantSampleTypes + 3) ]]; -constant const uint32_t sampleType5 [[ function_constant(FunctionConstantSampleTypes + 4) ]]; -constant const uint32_t sampleType6 [[ function_constant(FunctionConstantSampleTypes + 5) ]]; -constant const uint32_t sampleType7 [[ function_constant(FunctionConstantSampleTypes + 6) ]]; -constant const uint32_t sampleType8 [[ function_constant(FunctionConstantSampleTypes + 7) ]]; +constant const uint8_t sampleType1 [[ function_constant(FunctionConstantSampleTypes + 0) ]]; +constant const uint8_t sampleType2 [[ function_constant(FunctionConstantSampleTypes + 1) ]]; +constant const uint8_t sampleType3 [[ function_constant(FunctionConstantSampleTypes + 2) ]]; +constant const uint8_t sampleType4 [[ function_constant(FunctionConstantSampleTypes + 3) ]]; +constant const uint8_t sampleType5 [[ function_constant(FunctionConstantSampleTypes + 4) ]]; +constant const uint8_t sampleType6 [[ function_constant(FunctionConstantSampleTypes + 5) ]]; +constant const uint8_t sampleType7 [[ function_constant(FunctionConstantSampleTypes + 6) ]]; +constant const uint8_t sampleType8 [[ function_constant(FunctionConstantSampleTypes + 7) ]]; #define MAX_BLEND_PASSES 8 constant const uint8_t sourceTypes[MAX_BLEND_PASSES] = { sourceType1, sourceType2, sourceType3, sourceType4, sourceType5, sourceType6, sourceType7, sourceType8}; constant const uint32_t blendModes[MAX_BLEND_PASSES] = { blendModes1, blendModes2, blendModes3, blendModes4, blendModes5, blendModes6, blendModes7, blendModes8}; constant const uint32_t miscFlags[MAX_BLEND_PASSES] = { miscFlags1, miscFlags2, miscFlags3, miscFlags4, miscFlags5, miscFlags6, miscFlags7, miscFlags8}; -constant const uint32_t sampleTypes[MAX_BLEND_PASSES] = { sampleType1, sampleType2, sampleType3, sampleType4, sampleType5, sampleType6, sampleType7, sampleType8}; +constant const uint8_t sampleTypes[MAX_BLEND_PASSES] = { sampleType1, sampleType2, sampleType3, sampleType4, sampleType5, sampleType6, sampleType7, sampleType8}; constant const uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); typedef struct { @@ -488,7 +488,7 @@ half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 verte with a constant. Using an array based lookup was hurting performance by about 1/3rd on Apple Silicon. */ - const uint32_t sampleType = sampleTypes[index]; + const uint8_t sampleType = sampleTypes[index]; sampler colorSampler = repeatSampler; if(sampleType == 1) { colorSampler = clampRepeatSampler; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 86e7875d5f..96d501de19 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -577,7 +577,7 @@ void plMetalMaterialShaderRef::GetMiscFlagArray(uint32_t *array, uint8_t pass) { } } -void plMetalMaterialShaderRef::GetSampleTypeArray(size_t *array, uint8_t pass) { +void plMetalMaterialShaderRef::GetSampleTypeArray(uint8_t *array, uint8_t pass) { memset(array, 0, sizeof(uint8_t) * 8); uint16_t currNumLayers = fPassLengths[pass]; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index d97ba131f5..6b19d39c2c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -96,7 +96,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void GetSourceArray(uint8_t *array, uint8_t pass); void GetBlendFlagArray(uint32_t *array, uint8_t pass); void GetMiscFlagArray(uint32_t *array, uint8_t pass); - void GetSampleTypeArray(size_t *array, uint8_t pass); + void GetSampleTypeArray(uint8_t *array, uint8_t pass); private: void ILoopOverLayers(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 239d9c80c3..985f7ea0fa 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1558,7 +1558,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons uint8_t sources[8]; uint32_t blendModes[8]; uint32_t miscFlags[8]; - size_t sampleTypes[8]; + uint8_t sampleTypes[8]; memset(sources, 0, sizeof(sources)); memset(blendModes, 0, sizeof(blendModes)); memset(miscFlags, 0, sizeof(miscFlags)); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 7eb1e3742c..199cddfd93 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -104,7 +104,7 @@ void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstan constants->setConstantValues(&fPassDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); constants->setConstantValues(&fPassDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); constants->setConstantValues(&fPassDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); - constants->setConstantValues(&fPassDescription.sampleTypes, MTL::DataTypeUInt, NS::Range(FunctionConstantSampleTypes, 8)); + constants->setConstantValues(&fPassDescription.sampleTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSampleTypes, 8)); } size_t plMetalMaterialPassPipelineState::GetHash() const { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index cf96edaa3f..d152f67c54 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -116,7 +116,7 @@ struct plMetalMaterialPassDescription { uint8_t passTypes[8]; uint32_t blendModes[8]; uint32_t miscFlags[8]; - uint32_t sampleTypes[8]; + uint8_t sampleTypes[8]; uint8_t numLayers; bool operator==(const plMetalMaterialPassDescription &p) const { From 6f1671ae56045f5e3487677b6b69babfb187a371 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 19 May 2022 20:26:04 -0700 Subject: [PATCH 062/165] Re-creating plate manager when reloading device This allows the intro movie to properly resize (which relies on the plate manager getting re-created.) --- .../pfMetalPipeline/plMetalPipeline.cpp | 37 ++++++++++++++++--- .../pfMetalPipeline/plMetalPipeline.h | 3 ++ .../pfMetalPipeline/plMetalPlateManager.cpp | 16 +++++++- .../pfMetalPipeline/plMetalPlateManager.h | 1 + 4 files changed, 50 insertions(+), 7 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 985f7ea0fa..1d1d6544f0 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -204,13 +204,12 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons fDevice.SetMaxAnsiotropy(fInitialPipeParams.AnisotropicLevel); fDevice.SetMSAASampleCount(fInitialPipeParams.AntiAliasingAmount); - fPlateMgr = new plMetalPlateManager(this); - fCurrentRenderPassUniforms = (VertexUniforms *) calloc(sizeof(VertexUniforms), sizeof(char)); - //FIXME: Add ICreateDynDeviceObjects like DX // RenderTarget pools are shared for our shadow generation algorithm. // Different sizes for different resolutions. + ICreateDeviceObjects(); + ICreateDynDeviceObjects(); IMakeRenderTargetPools(); } @@ -222,6 +221,10 @@ plMetalPipeline::~plMetalPipeline() } } +void plMetalPipeline::ICreateDeviceObjects() { + fPlateMgr = new plMetalPlateManager(this); +} + bool plMetalPipeline::PreRender(plDrawable *drawable, std::vector &visList, plVisMgr *visMgr) { plDrawableSpans *ds = plDrawableSpans::ConvertNoRef(drawable); @@ -731,7 +734,7 @@ void plMetalPipeline::Resize(uint32_t width, uint32_t height) plViewTransform resetTransform = GetViewTransform(); // Destroy old - //IReleaseDeviceObjects(); + IReleaseDeviceObjects(); IReleaseDynDeviceObjects(); // Reset width and height @@ -748,6 +751,8 @@ void plMetalPipeline::Resize(uint32_t width, uint32_t height) // Just for debug hsStatusMessage( "Recreating the pipeline...\n" ); } + + ICreateDeviceObjects(); // Restore states SetViewTransform(resetTransform); @@ -763,6 +768,15 @@ void plMetalPipeline::Resize(uint32_t width, uint32_t height) plgDispatch::MsgSend(clean); } + +void plMetalPipeline::IReleaseDeviceObjects() +{ + IReleaseDynDeviceObjects(); + + delete fPlateMgr; + fPlateMgr = nullptr; +} + void plMetalPipeline::LoadResources() { hsStatusMessageF("Begin Device Reload t=%f",hsTimer::GetSeconds()); @@ -774,7 +788,8 @@ void plMetalPipeline::LoadResources() if (plMetalPlateManager* pm = static_cast(fPlateMgr)) pm->IReleaseGeometry(); - + + IReleaseDynamicBuffers(); IReleaseAvRTPool(); // Create all RenderTargets @@ -3005,12 +3020,22 @@ void plMetalPipeline::IReleaseDynDeviceObjects() // The shared dynamic vertex buffers used by things like objects skinned on CPU, or // particle systems. - //IReleaseDynamicBuffers(); + IReleaseDynamicBuffers(); //IReleaseAvRTPool(); IReleaseRenderTargetPools(); } +// IReleaseDynamicBuffers ///////////////////////////////////////////////// +// Release everything we've created in POOL_DEFAULT. +// This is called on shutdown or when we lose the device. Search for D3DERR_DEVICELOST. +void plMetalPipeline::IReleaseDynamicBuffers() +{ + // PlateMgr has a POOL_DEFAULT vertex buffer for drawing quads. + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) + pm->IReleaseGeometry(); +} + // IReleaseRenderTargetPools ////////////////////////////////////////////////// // Free up all resources assosiated with our pools of rendertargets of varying // sizes. Primary user of these pools is the shadow generation. diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 9f4bb7f314..e7173dda04 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -185,8 +185,11 @@ class plMetalPipeline : public pl3DPipeline float uOff, float vOff, plMipmap *tex); void IClearShadowSlaves(); + void ICreateDeviceObjects(); void IReleaseDynDeviceObjects(); bool ICreateDynDeviceObjects(); + void IReleaseDynamicBuffers(); + void IReleaseDeviceObjects(); bool IIsViewLeftHanded(); void ISetCullMode(bool flip = false); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 737ce4088f..2cbf4f3d79 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -131,7 +131,11 @@ void plMetalPlateManager::encodeVertexBuffer(MTL::RenderCommandEncoder *encoder) void plMetalPlateManager::IReleaseGeometry() { - //fVtxBuffer->release(); + if (fVtxBuffer) + { + fVtxBuffer->release(); + fVtxBuffer = nullptr; + } } void plMetalPlateManager::IDrawToDevice(plPipeline *pipe) { @@ -139,9 +143,19 @@ void plMetalPlateManager::IDrawToDevice(plPipeline *pipe) { plPlate* plate = nullptr; for (plate = fPlates; plate != nullptr; plate = plate->GetNext()) { + printf("begginning plate draw\n"); if (plate->IsVisible()) { pipeline->IDrawPlate(plate); + printf("drawing plate\n"); + } else { + printf("skipping plate\n"); } + printf("ending plate draw\n"); } } +plMetalPlateManager::~plMetalPlateManager() +{ + IReleaseGeometry(); +} + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h index f26df1e49b..ffd956df22 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -61,6 +61,7 @@ class plMetalPlateManager : public plPlateManager void IReleaseGeometry(); MTL::RenderPipelineState *fPlateRenderPipelineState; void encodeVertexBuffer(MTL::RenderCommandEncoder *encoder); + ~plMetalPlateManager(); private: struct plateVertexBuffer { hsPoint2 vertices[4]; From 1ed5e18e8f99b3f2c237fa625c3dffe1591aeccf Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 19 May 2022 21:40:46 -0700 Subject: [PATCH 063/165] Removing logging MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit That wasn’t supposed to be in the last commit --- .../FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 2cbf4f3d79..14cc510547 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -143,14 +143,9 @@ void plMetalPlateManager::IDrawToDevice(plPipeline *pipe) { plPlate* plate = nullptr; for (plate = fPlates; plate != nullptr; plate = plate->GetNext()) { - printf("begginning plate draw\n"); if (plate->IsVisible()) { pipeline->IDrawPlate(plate); - printf("drawing plate\n"); - } else { - printf("skipping plate\n"); } - printf("ending plate draw\n"); } } From 0d680e31d375a696b15bb62fd637fc49f6fde48a Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 19 May 2022 21:42:12 -0700 Subject: [PATCH 064/165] Fixing device selection This should align the framebuffer device and the render device on dual GPU Intel Macbook Pros. Previously it would try to draw to an Intel IGP framebuffer while rendering on the discrete card. The way this is implemented is a bit of a hack. The Mac client app probably needs a deeper refactoring. --- .../Plasma/Apps/plClient/Mac-Cocoa/main.mm | 46 ++++++++++--------- .../pfMetalPipeline/plMetalPipeline.cpp | 2 +- .../pfMetalPipeline/plMetalPipeline.h | 2 +- 3 files changed, 27 insertions(+), 23 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index 64d21f4475..37c4ab4ada 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -470,14 +470,38 @@ - (void)startClient #if PLASMA_PIPELINE_METAL plMetalPipeline *pipeline = (plMetalPipeline *)gClient->GetPipeline(); - pipeline->currentDrawableCallback = [self] { + pipeline->currentDrawableCallback = [self] (MTL::Device* device) { id< CAMetalDrawable > drawable; + id metalDevice = (__bridge id)device; + if(((CAMetalLayer *) _renderLayer).device != metalDevice) { + ((CAMetalLayer *) _renderLayer).device = metalDevice; + dispatch_async(dispatch_get_main_queue(), ^{ + [self updateWindowTitle]; + }); + } drawable = [((CAMetalLayer *) _renderLayer) nextDrawable]; CA::MetalDrawable * mtlDrawable = ( __bridge CA::MetalDrawable* ) drawable; mtlDrawable->retain(); return mtlDrawable; }; + if (!gClient) { + exit(0); + } + + self.eventMonitor = [[PLSKeyboardEventMonitor alloc] initWithView:self.window.contentView + inputManager:&gClient]; + ((PLSView*)self.window.contentView).inputManager = gClient->GetInputManager(); + [self.window makeFirstResponder:self.window.contentView]; + + // Main loop + if (gClient && !gClient->GetDone()) { + [self startRunLoop]; + } +} + +- (void)updateWindowTitle +{ NSString *productTitle = [NSString stringWithCString:plProduct::LongName().c_str() encoding:NSUTF8StringEncoding]; id device = ((CAMetalLayer *) self.window.contentView.layer).device; #ifdef HS_DEBUGGING @@ -496,26 +520,6 @@ - (void)startClient #else [self.window setTitle:[NSString stringWithCString:plProduct::LongName().c_str() encoding:NSUTF8StringEncoding]]; #endif - - if (!gClient) { - exit(0); - } - - self.eventMonitor = [[PLSKeyboardEventMonitor alloc] initWithView:self.window.contentView - inputManager:&gClient]; - ((PLSView*)self.window.contentView).inputManager = gClient->GetInputManager(); - [self.window makeFirstResponder:self.window.contentView]; - - // Main loop - if (gClient && !gClient->GetDone()) { - [self startRunLoop]; - } -} - -- (void)updateWindowTitle -{ - NSString* productTitle = [NSString stringWithSTString:plProduct::LongName()]; - [self.window setTitle:productTitle]; } - (NSApplicationTerminateReply)applicationShouldTerminate:(NSApplication*)sender diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 1d1d6544f0..b550d907a9 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -632,7 +632,7 @@ bool plMetalPipeline::BeginRender() IPreprocessShadows(); IPreprocessAvatarTextures(); - CA::MetalDrawable *drawable = currentDrawableCallback(); + CA::MetalDrawable *drawable = currentDrawableCallback(fDevice.fMetalDevice); if(!drawable) { fCurrentPool->release(); return false; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index e7173dda04..0c6d318572 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -85,7 +85,7 @@ class plMetalPipeline : public pl3DPipeline { public: //The actual client should set this callback so we can retrieve drawables from the window server - std::function currentDrawableCallback; + std::function currentDrawableCallback; //caching the frag function here so that the shader compiler can quickly access it MTL::Function* fFragFunction; From fb8a12f9d4af32239ed7f4649f2ab8e309d66f36 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 29 May 2022 16:09:48 -0700 Subject: [PATCH 065/165] Fixing water on Nvidia/MSL/Fast Math MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The water vertex shader needs to square as part of it’s reference math, but the value it needs to square can sometimes be negative. HLSL usually just does the “right” thing but MSL (and GLSL) need more direction and can treat squaring or square rooting a negative as undefined behavior. Absolute valuing first - in since we know that a square will always produce a positive. Long term - this HLSL code was ported over directly. It’s possible that this code could be replaced by some other Metal functions like reflect. More research is required. --- .../Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index a6ddea4b1d..c4ccd6de0c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -350,7 +350,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], float3 F = uniforms.EnvAdjust.xyz; float G = uniforms.EnvAdjust.w; //METAL NOTE: HLSL 1.1 always applies an abs operation to values it's about to sqrt - float3 t = dot(D.xyz, F.xyz) + sqrt(abs(pow(dot(D.xyz, F.xyz), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) + float3 t = dot(D.xyz, F.xyz) + sqrt(abs(pow(abs(dot(D.xyz, F.xyz)), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) } From 138690b79a980420f08a62a5841910143216bb85 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 29 May 2022 23:17:41 -0700 Subject: [PATCH 066/165] =?UTF-8?q?Updating=20=E2=80=9Cfixed=E2=80=9D=20sh?= =?UTF-8?q?aders=20to=20remove=20const=20samplers?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was suplimented by the samplers that are set as pipeline resources to support configurable sampling. --- .../ShaderSrc/FixedPipelineShaders.metal | 28 ++++--------------- 1 file changed, 5 insertions(+), 23 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 6c79f5b50f..23ee67733a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -235,28 +235,6 @@ typedef struct float3 texCoord1; } ShadowCasterInOut; -constant constexpr sampler colorSamplers[] = { - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::clamp_to_edge, - t_address::repeat), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - s_address::repeat, - t_address::clamp_to_edge), - sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::clamp_to_edge), - -}; - vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], constant float4x4 & blendMatrix1 [[ buffer(BufferIndexBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) @@ -704,7 +682,11 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], if(fragmentUniforms.pointLightCast) { sampleCoords.xy /= sampleCoords.z; } - half4 currentColor = texture.sample(colorSamplers[3], sampleCoords.xy); + const sampler colorSample = sampler(mip_filter::linear, + mag_filter::linear, + min_filter::linear, + address::clamp_to_edge); + half4 currentColor = texture.sample(colorSample, sampleCoords.xy); currentColor.rgb *= in.vtxColor.rgb; const float2 LUTCoords = in.texCoord2.xy; From 6e7243acf87eda7fb52cf636c5fe015f607bc571 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 16 Jun 2022 23:06:43 -0700 Subject: [PATCH 067/165] Moving redering to main thread Also adding run loop to take care of intro movie not running on main thread, and honoring full screen setting --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index b550d907a9..ca9d3e7d5b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -719,7 +719,7 @@ void plMetalPipeline::RenderScreenElements() { plProfile_EndTiming(Reset); } -bool plMetalPipeline::IsFullScreen() const { return false; } +bool plMetalPipeline::IsFullScreen() const { return fDefaultPipeParams.Windowed; } void plMetalPipeline::Resize(uint32_t width, uint32_t height) { From 160cda058d599f6eb3b594e74b2b332a0d5effb7 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 25 Jun 2022 18:23:53 -0700 Subject: [PATCH 068/165] Fixes for fog The previous fog algorithm was very wrong, but also covering for a problem where fog was being applied to layers it should not have been --- .../ShaderSrc/FixedPipelineShaders.metal | 48 +++++++++++++++++-- .../pfMetalPipeline/plMetalPipeline.cpp | 5 ++ .../pfMetalPipeline/plMetalPipelineState.cpp | 14 ++---- 3 files changed, 54 insertions(+), 13 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 23ee67733a..88341f7cd3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -524,7 +524,7 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], currentColor = half4(in.vtxColor.rgb, 1.0h) * currentColor; } - currentColor.rgb = mix(currentColor.rgb, in.fogColor.rgb, (1.0h - clamp((float)in.fogColor.a, 0.0f, 1.0f)) * (float)currentColor.a); + currentColor.rgb = mix(in.fogColor.rgb, currentColor.rgb, (float)clamp(in.fogColor.a, 0.0h, 1.0h)); if (currentColor.a < fragmentShaderArgs.bufferedUniforms->alphaThreshold) { discard_fragment(); } @@ -672,6 +672,41 @@ fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) } + constant float2 poissonDisk[16] = { + float2( -0.94201624, -0.39906216 ), + float2( 0.94558609, -0.76890725 ), + float2( -0.094184101, -0.92938870 ), + float2( 0.34495938, 0.29387760 ), + float2( -0.91588581, 0.45771432 ), + float2( -0.81544232, -0.87912464 ), + float2( -0.38277543, 0.27676845 ), + float2( 0.97484398, 0.75648379 ), + float2( 0.44323325, -0.97511554 ), + float2( 0.53742981, -0.47373420 ), + float2( -0.26496911, -0.41893023 ), + float2( 0.79197514, 0.19090188 ), + float2( -0.24188840, 0.99706507 ), + float2( -0.81409955, 0.91437590 ), + float2( 0.19984126, 0.78641367 ), + float2( 0.14383161, -0.14100790 ) + }; + + + const float rand(float3 co){ + //since opengl es only garantees that mediump will be 10 bits, we need to try and + //keep the numbers low. The actual constants are mostly arbilitary chosen with the + //goal to give different weightings to the first or seccond element + + float3 product = float3( sin( dot(co, float3(0.129898,0.78233, 0.129898))), + sin( dot(co, float3(0.689898,0.23233, 0.689898))), + sin( dot(co, float3(0.434198,0.51833, 0.434198))) ); + + + float3 weighting = float3(4.37585453723, 2.465973, 3.18438); + + return fract(dot(weighting, product)); + } + fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], texture2d texture [[ texture(16) ]], constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(BufferIndexShadowCastFragArgBuffer) ]], @@ -682,11 +717,16 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], if(fragmentUniforms.pointLightCast) { sampleCoords.xy /= sampleCoords.z; } - const sampler colorSample = sampler(mip_filter::linear, - mag_filter::linear, + const sampler colorSample = sampler( mag_filter::linear, min_filter::linear, address::clamp_to_edge); - half4 currentColor = texture.sample(colorSample, sampleCoords.xy); + + half4 currentColor = 0.0; + for (int i=0;i<4;i++){ + int index = int(16.0*rand(floor(in.position.xyz*1000.0) + i))%16; + currentColor += 0.25 * texture.sample(colorSample, sampleCoords.xy + poissonDisk[index]/700.0); + } + //half4 currentColor = texture.sample(colorSample, sampleCoords.xy); currentColor.rgb *= in.vtxColor.rgb; const float2 LUTCoords = in.texCoord2.xy; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index ca9d3e7d5b..5ebd275a63 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2338,6 +2338,11 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye fCurrentRenderPassUniforms->fogColor = { 0.0, 0.0, 0.0 }; break; } + + + if( currLayer->GetBlendFlags() & (hsGMatState::kBlendAdd | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha) ) { + fCurrentRenderPassUniforms->fogColor = { 0.0, 0.0, 0.0 }; + } } void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 199cddfd93..9727658eef 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -174,16 +174,11 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { //printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); } else { //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); - descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha);; } + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); } else { if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { //printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); @@ -249,11 +244,12 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode case 0: //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); + descriptor->setAlphaBlendOperation(MTL::BlendOperationAdd); //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); - descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); - descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); /*descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); From e183e50e5581972b12a13614b9ba787ad3b48a26 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 25 Jun 2022 21:40:05 -0700 Subject: [PATCH 069/165] Moving depth buffer to memoryless on Apple Silicon --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 8e5602a884..17b18f8f4b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -937,6 +937,11 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) depthTextureDescriptor->setSampleCount(fSampleCount); depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); depthTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); + if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { + depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); + } else { + depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); + } fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); MTL::TextureDescriptor *msaaColorTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), From f84ff03612e9c334d7a9be9b504ddedae135f3f1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 25 Jun 2022 22:27:56 -0700 Subject: [PATCH 070/165] Fixing god rays in Kadish Flat color base layers were blending against themselves --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 88341f7cd3..30f8a2f6aa 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -515,7 +515,10 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], half4 color = fragmentShaderArgs.sampleLayer(layer, in.vtxColor, sourceTypes[layer], sampleCoord); if(layer==0) { - blendFirst(color, currentColor, blendModes[layer]); + //only blend if there is a texture to blend into + if(sourceTypes[0] != PassTypeColor) { + blendFirst(color, currentColor, blendModes[layer]); + } } else { blend(color, currentColor, blendModes[layer]); } From 1d1f7002cb8915392e8b8d563d9a6147ed83008f Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 26 Jun 2022 11:31:21 -0700 Subject: [PATCH 071/165] Fixes for problematic assets in Kadish MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A cube map near or in the vault in Kadish has inconsistant cubic mipmap face sizes, and will cause Metal to crash. This change will cause Metal to manually compute mipmap sizes, and then load the data anyway. This is what DX does. The mipmaps affected don’t seem to be intended to be used. --- .../pfMetalPipeline/plMetalDevice.cpp | 27 +++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 17b18f8f4b..2db035b46b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -769,13 +769,36 @@ uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMi void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice) { if (img->IsCompressed()) { + /* + Some cubic assets have inconsistant mipmap sizes between their faces. + The DX pipeline maintains seperate structures noting the expected + mipmap sizes, and ignores the actual face sizes. This hack + makes the Metal pipeline ignore the actual face sizes and behave + as if all face sizes are equivelent to the first face. It does this + by computing the expected mipmap sizes on the fly. + This hack could be disabled if cube maps in the assets were + fixed to be consistant. + */ +#define HACK_LEVEL_SIZE 1 + +#if HACK_LEVEL_SIZE + uint width = tRef->fTexture->width(); + uint height = tRef->fTexture->height(); +#endif for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { img->SetCurrLevel(lvl); +#if HACK_LEVEL_SIZE + uint levelWidth = (width / exp2(lvl)); + uint levelHeight = (height / exp2(lvl)); +#else + uint levelWidth = img->GetCurrWidth(); + uint levelHeight = img->GetCurrHeight(); +#endif - switch (img->fDirectXInfo.fCompressionType) { + switch (img->fDirectXInfo.fCompressionType) { case plBitmap::DirectXInfo::kDXT1: - tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 2, 0); + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, levelWidth, levelHeight), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), levelWidth * 2, 0); break; case plBitmap::DirectXInfo::kDXT5: tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); From e0e69c82a0e38ec2a59108384f6bde75e973634e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 26 Jun 2022 12:13:41 -0700 Subject: [PATCH 072/165] Fixing fog color getting mingled with projections Calc lighting was done before projections were applied. This was allowing fog (which is part of lighting calc in Metal) to be enabled for projection layers. This was most visible in Kadish. This code is kind of messy, but has been through several improvement passes already. --- .../pfMetalPipeline/plMetalPipeline.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 5ebd275a63..f0a048d17f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1458,7 +1458,6 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons //plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); plLayerInterface *lay = material->GetLayer(mRef->GetPassIndex(pass)); - ICalcLighting(mRef, lay, currSpan); hsGMatState s; s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); @@ -1491,6 +1490,10 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons //Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders to null after calling this. That won't work here in since our pipeline state has to know the shaders. if(lay->GetVertexShader() && allowShaders) { + + lay = IPushOverBaseLayer(lay); + lay = IPushOverAllLayer(lay); + //pure shader path plShader *vertexShader = lay->GetVertexShader(); plShader *fragShader = lay->GetPixelShader(); @@ -1544,11 +1547,21 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons } fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i + idOffset); + } + lay = IPopOverAllLayer(lay); + lay = IPopOverBaseLayer(lay); } else { //"Fixed" path + /* + To compute correct lighting we need to add the pushover layers. + The actual renderer will do it's own add and remove, so remove the + pushover layer before we get to the actual layer loop. + */ lay = IPushOverBaseLayer(lay); + lay = IPushOverAllLayer(lay); + ICalcLighting(mRef, lay, currSpan); s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); @@ -1579,6 +1592,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons memset(miscFlags, 0, sizeof(miscFlags)); memset(sampleTypes, 0, sizeof(sampleTypes)); + lay = IPopOverAllLayer(lay); lay = IPopOverBaseLayer(lay); if(numActivePiggyBacks==0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { From c971b25625e94485bb8e2eeefe0220eb23484b56 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 5 Jul 2022 19:34:41 -0700 Subject: [PATCH 073/165] Moving opacity check after lighting calc This fixes steam in Gira and probably other things. This function is still kind of a mess and needs more cleanup. The DX version is also messy though. --- .../pfMetalPipeline/plMetalPipeline.cpp | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index f0a048d17f..20b7af31f1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1461,19 +1461,6 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons hsGMatState s; s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); - - /* - If the layer opacity is 0, don't draw it. This prevents it from contributing to the Z buffer. - This can happen with some models like the fire marbles in the neighborhood that have some models - for physics only, and then can block other rendering in the Z buffer. - DX pipeline does this in ILoopOverLayers. - */ - if( (s.fBlendFlags & hsGMatState::kBlendAlpha) - &&lay->GetOpacity() <= 0 - &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) { - - return false; - } IHandleZMode(s); IHandleBlendMode(s); @@ -1565,6 +1552,19 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + /* + If the layer opacity is 0, don't draw it. This prevents it from contributing to the Z buffer. + This can happen with some models like the fire marbles in the neighborhood that have some models + for physics only, and then can block other rendering in the Z buffer. + DX pipeline does this in ILoopOverLayers. + */ + if( (s.fBlendFlags & hsGMatState::kBlendAlpha) + &&lay->GetOpacity() <= 0 + &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) { + + return false; + } + if (s.fBlendFlags & hsGMatState::kBlendInvertVtxAlpha) fCurrentRenderPassUniforms->invVtxAlpha = true; else From 1de1a589ba9872a775ffb98ab325222d6e3c1133 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 5 Jul 2022 23:51:45 -0700 Subject: [PATCH 074/165] Fixing infinite loop bug created by early return Have to pop the layers. Code is still messy and needs a better solution. --- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 20b7af31f1..09002152bd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1562,6 +1562,11 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons &&lay->GetOpacity() <= 0 &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) { + //FIXME: All these popping of layers in the return sections is getting ugly + + lay = IPopOverAllLayer(lay); + lay = IPopOverBaseLayer(lay); + return false; } From c25ee81fb039409170058b333f28b46b1c96c7d8 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 10 Jul 2022 17:06:02 -0700 Subject: [PATCH 075/165] Adding ISetLayer Caching z bias state. Also caching cull mode state. --- .../pfMetalPipeline/plMetalPipeline.cpp | 40 ++++++++++++++----- .../pfMetalPipeline/plMetalPipeline.h | 4 ++ 2 files changed, 35 insertions(+), 9 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 09002152bd..3701e40068 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -156,7 +156,7 @@ bool plRenderTriListFunc::RenderPrims() const -plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), fPipelineState(nullptr), fCurrentRenderPassUniforms(nullptr), currentDrawableCallback(nullptr), fFragFunction(nullptr), fVShaderRefList(nullptr), fPShaderRefList(nullptr), fULutTextureRef(nullptr) +plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), fPipelineState(nullptr), fCurrentRenderPassUniforms(nullptr), currentDrawableCallback(nullptr), fFragFunction(nullptr), fVShaderRefList(nullptr), fPShaderRefList(nullptr), fULutTextureRef(nullptr), fCurrRenderLayer() { fTextureRefList = nullptr; fVtxBuffRefList = nullptr; @@ -1455,12 +1455,16 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); fCurrLayerIdx = mRef->GetPassIndex(pass); - //plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); plLayerInterface *lay = material->GetLayer(mRef->GetPassIndex(pass)); hsGMatState s; s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); + + if( s.fZFlags & hsGMatState::kZIncLayer ) + ISetLayer(1); + else + ISetLayer(0); IHandleZMode(s); IHandleBlendMode(s); @@ -1474,6 +1478,7 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons ISetCullMode(); } + s = 0; //Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders to null after calling this. That won't work here in since our pipeline state has to know the shaders. if(lay->GetVertexShader() && allowShaders) { @@ -2066,8 +2071,7 @@ void plMetalPipeline::IHandleZMode(hsGMatState flags) switch (flags.fZFlags & hsGMatState::kZMask) { case hsGMatState::kZClearZ: - //FIXME: Clear should actually clear the Z target - newDepthState = fDevice.fNoZReadStencilState; + fDevice.Clear(false, {0.0f, 0.0f, 0.0f, 0.0f}, true, 0.0); break; case hsGMatState::kZNoZRead: newDepthState = fDevice.fNoZReadStencilState; @@ -2094,10 +2098,26 @@ void plMetalPipeline::IHandleZMode(hsGMatState flags) fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(newDepthState); fCurrentDepthStencilState = newDepthState; } +} - if (flags.fZFlags & hsGMatState::kZIncLayer) { - fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, -2.0, -2.0); - } else { +//// ISetLayer //////////////////////////////////////////////////////////////// +// Sets whether we're rendering a base layer or upper layer. Upper layer has +// a Z bias to avoid Z fighting. +void plMetalPipeline::ISetLayer( uint32_t lay ) +{ + if( lay ) + { + if( fCurrRenderLayer != lay ) + { + fCurrRenderLayer = lay; + + plCONST(int) kBiasMult = 8; + fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, -kBiasMult, -kBiasMult); + } + } + else if( fCurrRenderLayer != 0 ) + { + fCurrRenderLayer = 0; fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, 0.0, 0.0); } } @@ -4158,8 +4178,10 @@ bool plMetalPipeline::IIsViewLeftHanded() void plMetalPipeline::ISetCullMode(bool flip) { MTL::CullMode newCullMode = !IIsViewLeftHanded() ^ !flip ? MTL::CullModeFront : MTL::CullModeBack; - fDevice.CurrentRenderCommandEncoder()->setCullMode(newCullMode); - fCurrentCullMode = newCullMode; + if (fCurrentCullMode != newCullMode) { + fDevice.CurrentRenderCommandEncoder()->setCullMode(newCullMode); + fCurrentCullMode = newCullMode; + } } plMetalDevice* plMetalPipeline::GetMetalDevice() diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 0c6d318572..a9d65ceaa0 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -241,6 +241,8 @@ class plMetalPipeline : public pl3DPipeline void ResetMetalStateTracking(); + void ISetLayer( uint32_t lay ); + // Shadows std::vector fRenderTargetPool512; std::vector fRenderTargetPool256; @@ -253,6 +255,8 @@ class plMetalPipeline : public pl3DPipeline std::vector fProjEach; std::vector fProjAll; + uint32_t fCurrRenderLayer; + void PushCurrentLightSources(); void PopCurrentLightSources(); std::vector fLightSourceStack; From 711a6e87ed856744f26de40faa704fffa2595647 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 12 Jul 2022 22:01:00 -0700 Subject: [PATCH 076/165] Fixing state tracking Should fix bugs with water not drawing and cull mode not being tracked properly --- .../pfMetalPipeline/plMetalPipeline.cpp | 84 +++++++++---------- .../pfMetalPipeline/plMetalPipeline.h | 20 +++-- 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 3701e40068..361f4324d2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -323,7 +323,7 @@ void plMetalPipeline::PushRenderRequest(plRenderRequest *req) fView.SetMaxCullNodes(0); } - ResetMetalStateTracking(); + fState.Reset(); } void plMetalPipeline::PopRenderRequest(plRenderRequest *req) @@ -336,7 +336,7 @@ void plMetalPipeline::PopRenderRequest(plRenderRequest *req) //it won't be set yet on the new target //in theory we could have a stack of these so when we unwind we //could get the state back. - ResetMetalStateTracking(); + fState.Reset(); hsRefCnt_SafeUnRef(fView.fRenderRequest); fView = fViewStack.top(); @@ -348,7 +348,7 @@ void plMetalPipeline::PopRenderRequest(plRenderRequest *req) plRenderTarget* plMetalPipeline::PopRenderTarget() { pl3DPipeline::PopRenderTarget(); - ResetMetalStateTracking(); + fState.Reset(); } void plMetalPipeline::ClearRenderTarget(plDrawable *d) @@ -370,7 +370,7 @@ void plMetalPipeline::ClearRenderTarget(const hsColorRGBA *col, const float *dep hsColorRGBA clearColor = col ? *col : GetClearColor(); float clearDepth = depth ? *depth : fView.GetClearDepth(); fDevice.Clear(fView.fRenderState & kRenderClearColor, {clearColor.r, clearColor.g, clearColor.b, clearColor.a}, fView.fRenderState & kRenderClearDepth, 1.0); - ResetMetalStateTracking(); + fState.Reset(); } } @@ -613,7 +613,7 @@ bool plMetalPipeline::BeginRender() // offset transform RefreshScreenMatrices(); - ResetMetalStateTracking(); + fState.Reset(); // offset transform RefreshScreenMatrices(); @@ -635,13 +635,11 @@ bool plMetalPipeline::BeginRender() CA::MetalDrawable *drawable = currentDrawableCallback(fDevice.fMetalDevice); if(!drawable) { fCurrentPool->release(); - return false; + return true; } fDevice.CreateNewCommandBuffer(drawable); drawable->release(); } - - fCurrentCullMode = MTL::CullMode(-1); fRenderCnt++; @@ -654,7 +652,7 @@ bool plMetalPipeline::BeginRender() bool plMetalPipeline::EndRender() { bool retVal = false; - ResetMetalStateTracking(); + fState.Reset(); if (--fInSceneDepth == 0) { fDevice.SubmitCommandBuffer(); @@ -1171,9 +1169,9 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, if(!vRef->GetBuffer()) { return; } - if (fCurrentVertexBuffer != vRef->GetBuffer()) { + if (fState.fCurrentVertexBuffer != vRef->GetBuffer()) { fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); - fCurrentVertexBuffer = vRef->GetBuffer(); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); } fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); @@ -1220,7 +1218,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, //aux spans will change the current vertex buffer, put ours back fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); - fCurrentVertexBuffer = vRef->GetBuffer(); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); } @@ -1237,7 +1235,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, if( fShadows.size() ) { //if we had to render aux spans, we probably changed the vertex and index buffer //reset those - fCurrentVertexBuffer = vRef->GetBuffer(); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); IRenderShadowsOntoSpan(render, &span, material, vRef); @@ -1389,7 +1387,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) return; } fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); - fCurrentVertexBuffer = vRef->GetBuffer(); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); plRenderTriListFunc render(&fDevice, 0, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength); @@ -1470,16 +1468,14 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons IHandleBlendMode(s); if (s.fMiscFlags & hsGMatState::kMiscTwoSided) { - if(fCurrentCullMode != MTL::CullModeNone) { + if(fState.fCurrentCullMode != MTL::CullModeNone) { fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); - fCurrentCullMode = MTL::CullModeNone; + fState.fCurrentCullMode = MTL::CullModeNone; } } else { ISetCullMode(); } - s = 0; - //Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders to null after calling this. That won't work here in since our pipeline state has to know the shaders. if(lay->GetVertexShader() && allowShaders) { @@ -1677,9 +1673,9 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons /*plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered, sources, blendModes, miscFlags); const MTL::RenderPipelineState *pipelineState = pipeline->pipelineState;*/ - if(fCurrentPipelineState != pipelineState) { + if(fState.fCurrentPipelineState != pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); - fCurrentPipelineState = pipelineState; + fState.fCurrentPipelineState = pipelineState; } } @@ -1920,9 +1916,9 @@ bool plMetalPipeline::ISetShaders(const plMetalVertexBufferRef * vRef, const hsG plShaderID::ID fragmentShaderID = pShader->GetDecl()->GetID(); plMetalDevice::plMetalLinkedPipeline *pipeline = plMetalDynamicMaterialPipelineState(&fDevice, vRef, blendMode.fBlendFlags, vertexShaderID, fragmentShaderID).GetRenderPipelineState(); - if(fCurrentPipelineState != pipeline->pipelineState) { + if(fState.fCurrentPipelineState != pipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipeline->pipelineState); - fCurrentPipelineState = pipeline->pipelineState; + fState.fCurrentPipelineState = pipeline->pipelineState; } if( vShader ) @@ -2094,9 +2090,9 @@ void plMetalPipeline::IHandleZMode(hsGMatState flags) break; } - if(fCurrentDepthStencilState != newDepthState) { + if(fState.fCurrentDepthStencilState != newDepthState) { fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(newDepthState); - fCurrentDepthStencilState = newDepthState; + fState.fCurrentDepthStencilState = newDepthState; } } @@ -2545,7 +2541,7 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) IHandleZMode(s); IHandleBlendMode(s); fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); - fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState; + fState.fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState; //column major layout simd_float4x4 projMat = matrix_identity_float4x4; @@ -2575,9 +2571,9 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) plMetalPlateManager *pm = (plMetalPlateManager *)fPlateMgr; - if(fCurrentPipelineState != pm->fPlateRenderPipelineState) { + if(fState.fCurrentPipelineState != pm->fPlateRenderPipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pm->fPlateRenderPipelineState); - fCurrentPipelineState = pm->fPlateRenderPipelineState; + fState.fCurrentPipelineState = pm->fPlateRenderPipelineState; } float alpha = material->GetLayer(0)->GetOpacity(); fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&alpha, sizeof(float), 6); @@ -2903,16 +2899,16 @@ void plMetalPipeline::IPreprocessAvatarTextures() if (k == plClothingElement::kLayerBase) { - if(fCurrentPipelineState != baseAvatarRenderState) { + if(fState.fCurrentPipelineState != baseAvatarRenderState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(baseAvatarRenderState); - fCurrentPipelineState = baseAvatarRenderState; + fState.fCurrentPipelineState = baseAvatarRenderState; } } else { - if(fCurrentPipelineState != avatarRenderState) { + if(fState.fCurrentPipelineState != avatarRenderState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(avatarRenderState); - fCurrentPipelineState = avatarRenderState; + fState.fCurrentPipelineState = avatarRenderState; } } fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&tint, sizeof(hsColorRGBA), 0); @@ -3874,18 +3870,19 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp /// Switch to the vertex buffer we want plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowCasterPipelineState(&fDevice, vRef).GetRenderPipelineState(); - if(fCurrentPipelineState != linkedPipeline->pipelineState) { + if(fState.fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); - fCurrentPipelineState = linkedPipeline->pipelineState; + fState.fCurrentPipelineState = linkedPipeline->pipelineState; } - if (fCurrentVertexBuffer != vRef->GetBuffer()) { + if (fState.fCurrentVertexBuffer != vRef->GetBuffer()) { fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); - fCurrentVertexBuffer = vRef->GetBuffer(); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); } - fCurrentVertexBuffer = vRef->GetBuffer(); + fState.fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + fState.fCurrentCullMode = MTL::CullModeNone; fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); uint32_t vStart = span.fVStartIdx; @@ -3952,9 +3949,9 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con } plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); - if(fCurrentPipelineState != linkedPipeline->pipelineState) { + if(fState.fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); - fCurrentPipelineState = linkedPipeline->pipelineState; + fState.fCurrentPipelineState = linkedPipeline->pipelineState; } int selfShadowNow = span->IsShadowBitSet(fShadows[i]->fIndex); @@ -4028,9 +4025,9 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) // Zbuffering on read-only - if(fCurrentDepthStencilState != fDevice.fNoZWriteStencilState) { + if(fState.fCurrentDepthStencilState != fDevice.fNoZWriteStencilState) { fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZWriteStencilState); - fCurrentDepthStencilState = fDevice.fNoZWriteStencilState; + fState.fCurrentDepthStencilState = fDevice.fNoZWriteStencilState; } int numUVSrcs = 2; @@ -4178,9 +4175,9 @@ bool plMetalPipeline::IIsViewLeftHanded() void plMetalPipeline::ISetCullMode(bool flip) { MTL::CullMode newCullMode = !IIsViewLeftHanded() ^ !flip ? MTL::CullModeFront : MTL::CullModeBack; - if (fCurrentCullMode != newCullMode) { + if (fState.fCurrentCullMode != newCullMode) { fDevice.CurrentRenderCommandEncoder()->setCullMode(newCullMode); - fCurrentCullMode = newCullMode; + fState.fCurrentCullMode = newCullMode; } } @@ -4621,9 +4618,10 @@ uint32_t plMetalPipeline::IGetBufferFormatSize( uint8_t format ) const return size; } -void plMetalPipeline::ResetMetalStateTracking() +void plMetalPipeline::plMetalPipelineCurrentState::Reset() { fCurrentPipelineState = nullptr; fCurrentDepthStencilState = nullptr; fCurrentVertexBuffer = nullptr; + fCurrentCullMode.reset(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index a9d65ceaa0..d0283de53b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -49,6 +49,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #include "ShaderTypes.h" +#include + class plIcicle; class plPlate; class plMetalMaterialShaderRef; @@ -164,11 +166,6 @@ class plMetalPipeline : public pl3DPipeline MTL::RenderPipelineState* fPipelineState; VertexUniforms* fCurrentRenderPassUniforms; - //cache to prevent oversetting, Metal won't catch this for us and will encode extra work - const MTL::RenderPipelineState* fCurrentPipelineState; - MTL::Buffer* fCurrentVertexBuffer; - MTL::DepthStencilState* fCurrentDepthStencilState; - void FindFragFunction(); void ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj = false); @@ -217,8 +214,6 @@ class plMetalPipeline : public pl3DPipeline plMetalVertexShader* fVShaderRefList; plMetalFragmentShader* fPShaderRefList; - MTL::CullMode fCurrentCullMode; - bool IPrepShadowCaster(const plShadowCaster* caster); bool IRenderShadowCaster(plShadowSlave* slave); void IPreprocessShadows(); @@ -239,8 +234,6 @@ class plMetalPipeline : public pl3DPipeline void IReleaseRenderTargetPools(); void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); - void ResetMetalStateTracking(); - void ISetLayer( uint32_t lay ); // Shadows @@ -264,6 +257,15 @@ class plMetalPipeline : public pl3DPipeline static plMetalEnumerate enumerator; NS::AutoreleasePool* fCurrentPool; + + struct plMetalPipelineCurrentState { + std::optional fCurrentCullMode; + const MTL::RenderPipelineState* fCurrentPipelineState; + MTL::Buffer* fCurrentVertexBuffer; + MTL::DepthStencilState* fCurrentDepthStencilState; + + void Reset(); + } fState; }; #endif // _plGLPipeline_inc_ From 4ffceed01d86cc300d017c067e2540e257ada23e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 17 Jul 2022 23:18:29 -0700 Subject: [PATCH 077/165] Refactoring pass code Clearing up redundant loops, clarifying code, fixing some latent possible issues with pushover layers not being consistantly applied --- .../ShaderSrc/GammaCorrection.metal | 2 +- .../plMetalMaterialShaderRef.cpp | 217 +++++------------- .../plMetalMaterialShaderRef.h | 18 +- .../pfMetalPipeline/plMetalPipeline.cpp | 70 +----- .../pfMetalPipeline/plMetalPipeline.h | 2 +- .../pfMetalPipeline/plMetalPipelineState.cpp | 35 ++- .../pfMetalPipeline/plMetalPipelineState.h | 27 ++- 7 files changed, 109 insertions(+), 262 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal index 3bd12b5fe2..da2290a278 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal @@ -67,7 +67,7 @@ vertex GammaVertexOut gammaCorrectVertex(constant GammaVertexIn *in [[ buffer(0) const constant sampler sourceSampler = sampler(); const constant sampler lutSampler = sampler( - filter::linear + filter::nearest ); fragment half4 gammaCorrectFragment( diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 96d501de19..d3bb7f3ab3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -61,6 +61,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMetalDevice.h" #include "plMetalPipeline.h" +#include + plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline *pipe) : fPipeline { pipe }, fMaterial { mat }, @@ -125,9 +127,9 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); - if (!texRef->fTexture) { - continue; - } + //if (!texRef->fTexture) { + // continue; + //} assert(i - GetPassIndex(pass) >= 0); EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); @@ -137,81 +139,35 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, BufferIndexFragArgBuffer); } -void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) +void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription* passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) { - //encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, BufferIndexFragArgBuffer); - vertexUniforms->numUVSrcs = fPassLengths[pass]; + std::vector layers = GetLayersForPass(pass); + if(piggyBacks) { - vertexUniforms->numUVSrcs += piggyBacks->size(); + layers.insert(layers.end(), piggyBacks->begin(), piggyBacks->end()); } + vertexUniforms->numUVSrcs = layers.size(); + plMetalFragmentShaderArgumentBuffer uniforms; - IHandleMaterial(GetPassIndex(pass), &uniforms, piggyBacks, + IHandleMaterial(GetPassIndex(pass), passDescription, &uniforms, piggyBacks, [&](plLayerInterface* layer, uint32_t index) { layer = preEncodeTransform(layer, index); IBuildLayerTexture(encoder, index, layer); + + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + + assert(index - GetPassIndex(pass) >= 0); + EncodeTransform(layer, &vertexUniforms->uvTransforms[index]); + return layer; }, [&](plLayerInterface* layer, uint32_t index) { layer = postEncodeTransform(layer, index); return layer; }); - size_t i = 0; - for (i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { - plLayerInterface* layer = fMaterial->GetLayer(i); - - if (!layer) { - continue; - } - - fPipeline->CheckTextureRef(layer); - - plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); - - if (!img) { - continue; - } - - plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); - - if (!texRef->fTexture) { - continue; - } - - assert(i - GetPassIndex(pass) >= 0); - EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); - } - - if(piggyBacks) { - for (size_t piggybackIndex = 0; piggybackIndex < piggyBacks->size(); piggybackIndex++) { - // Note that we take piggybacks off the end of piggyBacks. - plLayerInterface* layer = piggyBacks->at(piggyBacks->size() - 1 - piggybackIndex); - - if (!layer) { - continue; - } - - fPipeline->CheckTextureRef(layer); - - plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); - - if (!img) { - continue; - } - - plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); - - if (!texRef->fTexture) { - continue; - } - - assert(i - GetPassIndex(pass) >= 0); - EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass) + piggybackIndex]); - } - } - encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), BufferIndexFragArgBuffer); } @@ -226,7 +182,6 @@ void plMetalMaterialShaderRef::EncodeTransform(plLayerInterface* layer, UVOutDes //FIXME: Replace the plate codes path to texturing void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass) { - int32_t numTextures = 0; plLayerInterface* layer = fMaterial->GetLayer(pass); if (!layer) { @@ -251,8 +206,6 @@ void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encode } else if (plMipmap::ConvertNoRef(layer->GetTexture()) != nullptr || plRenderTarget::ConvertNoRef(layer->GetTexture()) != nullptr) { encoder->setFragmentTexture(texRef->fTexture, Texture); } - - numTextures++; } void plMetalMaterialShaderRef::ILoopOverLayers() @@ -262,7 +215,7 @@ void plMetalMaterialShaderRef::ILoopOverLayers() for (j = 0; j < fMaterial->GetNumLayers(); ) { - size_t iCurrMat = j; + size_t currLayer = j; //Create "fast encode" buffers //Fast encode can be used when there are no piggybacks or pushover layers. We'll load as much of the @@ -274,7 +227,9 @@ void plMetalMaterialShaderRef::ILoopOverLayers() plMetalFragmentShaderArgumentBuffer *layerBuffer = (plMetalFragmentShaderArgumentBuffer *)argumentBuffer->contents(); - j = IHandleMaterial(iCurrMat, layerBuffer, nullptr, + plMetalFragmentShaderDescription passDescription; + + j = IHandleMaterial(currLayer, &passDescription, layerBuffer, nullptr, [](plLayerInterface* layer, uint32_t index) { return layer; }, @@ -285,19 +240,28 @@ void plMetalMaterialShaderRef::ILoopOverLayers() if (j == -1) break; + passDescription.CacheHash(); + fFragmentShaderDescriptions.push_back(passDescription); + + std::vector layers(j); + pass++; //encode the colors for this pass into our buffer for fast rendering - for(int colorToEncode = 0; colorToEncode < j - iCurrMat; colorToEncode ++) { - IBuildLayerTexture(NULL, colorToEncode, fMaterial->GetLayer(iCurrMat + colorToEncode)); + for(int layerOffset = 0; layerOffset < j - currLayer; layerOffset ++) { + plLayerInterface* layer = fMaterial->GetLayer(currLayer + layerOffset); + layers[layerOffset] = layer; + IBuildLayerTexture(NULL, layerOffset, layer); } + fPasses.push_back(layers); + argumentBuffer->didModifyRange(NS::Range(0, argumentBuffer->length())); fPassArgumentBuffers.push_back(argumentBuffer); - fPassIndices.push_back(iCurrMat); - fPassLengths.push_back(j - iCurrMat); + fPassIndices.push_back(currLayer); + fPassLengths.push_back(j - currLayer); fNumPasses++; #if 0 @@ -320,6 +284,12 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc if (texture != nullptr && encoder) { plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); + if(!deviceTexture) { + //FIXME: Better way to address missing textures than null pointers + encoder->setFragmentTexture(nullptr, FragmentShaderArgumentAttributeCubicTextures + offsetFromRootLayer); + encoder->setFragmentTexture(nullptr, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); + return; + } hsAssert(offsetFromRootLayer <= 8, "Too many layers requested"); if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeCubicTextures + offsetFromRootLayer); @@ -402,7 +372,7 @@ bool plMetalMaterialShaderRef::ICanEatLayer(plLayerInterface* lay) return true; } -uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform) +uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform) { if (!fMaterial || layer >= fMaterial->GetNumLayers() || !fMaterial->GetLayer(layer)) { return -1; @@ -411,6 +381,8 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme if (false /*ISkipBumpMap(fMaterial, layer)*/) { return -1; } + + memset(passDescription, 0, sizeof(plMetalFragmentShaderDescription)); // Ignoring the bit about ATI Radeon and UVW limits @@ -421,7 +393,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme // Ignoring the bit about self-rendering cube maps plLayerInterface* currLay = /*IPushOverBaseLayer*/ fMaterial->GetLayer(layer); - preEncodeTransform(currLay, 0); + currLay = preEncodeTransform(currLay, 0); if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (currLay->GetMiscFlags() & hsGMatState::kMiscBumpDu)) { currLay = fMaterial->GetLayer(++layer); @@ -471,20 +443,22 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme //ISetBumpMatrices(currLay); } + passDescription->Populate(currLay, 0); postEncodeTransform(currLay, 0); int32_t i = 1; for (i = 1; i < currNumLayers; i++) { - plLayerInterface* layPtr = fMaterial->GetLayer(layer + i); if (!layPtr) { return -1; } - preEncodeTransform(layPtr, i); + layPtr = preEncodeTransform(layPtr, i); + + passDescription->Populate(layPtr, i); - postEncodeTransform(layPtr, i); + layPtr = postEncodeTransform(layPtr, i); } if(piggybacks) { @@ -495,12 +469,16 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme if (!layPtr) { return -1; } - preEncodeTransform(layPtr, i + currPiggyback); + layPtr = preEncodeTransform(layPtr, i + currPiggyback); + + passDescription->Populate(layPtr, i + currPiggyback); - postEncodeTransform(layPtr, i + currPiggyback); + layPtr = postEncodeTransform(layPtr, i + currPiggyback); } } + passDescription->numLayers = ( piggybacks ? piggybacks->size() : 0 ) + currNumLayers; + if (state.fBlendFlags & (hsGMatState::kBlendTest | hsGMatState::kBlendAlpha | hsGMatState::kBlendAddColorTimesAlpha) && !(state.fBlendFlags & hsGMatState::kBlendAlphaAlways)) { @@ -520,86 +498,3 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme return layer + currNumLayers; } - -void plMetalMaterialShaderRef::GetSourceArray(uint8_t *array, uint8_t pass) { - memset(array, 0, sizeof(uint8_t) * 8); - - uint16_t currNumLayers = fPassLengths[pass]; - uint16_t baseLayer = fPassIndices[pass]; - uint16_t i = 0; - for (i = 0; i < currNumLayers; i++) - { - plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); - plBitmap* texture = layPtr->GetTexture(); - if (texture != nullptr) { - plMetalTextureRef* texRef = (plMetalTextureRef*)texture->GetDeviceRef(); - if(!texRef->fTexture) - continue; - - plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); - if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { - array[i] = PassTypeCubicTexture; - } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { - array[i] = PassTypeTexture; - } else { - array[i] = PassTypeColor; - } - - } else { - array[i] = PassTypeColor; - } - } -} - -void plMetalMaterialShaderRef::GetBlendFlagArray(uint32_t *array, uint8_t pass) { - memset(array, 0, sizeof(uint8_t) * 8); - - uint16_t currNumLayers = fPassLengths[pass]; - uint16_t baseLayer = fPassIndices[pass]; - uint16_t i = 0; - for (i = 0; i < currNumLayers; i++) - { - plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); - array[i] = layPtr->GetBlendFlags(); - } -} - -void plMetalMaterialShaderRef::GetMiscFlagArray(uint32_t *array, uint8_t pass) { - memset(array, 0, sizeof(uint8_t) * 8); - - uint16_t currNumLayers = fPassLengths[pass]; - uint16_t baseLayer = fPassIndices[pass]; - uint16_t i = 0; - for (i = 0; i < currNumLayers; i++) - { - plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); - array[i] = layPtr->GetMiscFlags(); - } -} - -void plMetalMaterialShaderRef::GetSampleTypeArray(uint8_t *array, uint8_t pass) { - memset(array, 0, sizeof(uint8_t) * 8); - - uint16_t currNumLayers = fPassLengths[pass]; - uint16_t baseLayer = fPassIndices[pass]; - uint16_t i = 0; - for (i = 0; i < currNumLayers; i++) - { - plLayerInterface* layPtr = fMaterial->GetLayer(baseLayer + i); - - switch (layPtr->GetClampFlags()) { - case hsGMatState::kClampTextureU: - array[i] = 1; - break; - case hsGMatState::kClampTextureV: - array[i] = 2; - break; - case hsGMatState::kClampTexture: - array[i] = 3; - break; - default: - array[i] = 0; - break; - } - } -} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 6b19d39c2c..814a2fda7c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -45,8 +45,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "hsGMatState.h" #include "plMetalDeviceRef.h" #include "ShaderTypes.h" +#include "plMetalPipelineState.h" #include +#include class hsGMaterial; class plMetalPipeline; @@ -76,9 +78,11 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void CheckMateralRef(); size_t GetNumPasses() const { return fNumPasses; } + size_t GetPassIndex(size_t which) const { return fPassIndices[which]; } + const std::vector GetLayersForPass(size_t pass) { return fPasses[pass]; } - void EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform); + void EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription *passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform); void FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass); //probably not a good idea to call prepareTextures directly //mostly just a hack to keep plates working for now @@ -90,23 +94,21 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef // fMatOverOff overrides to clear a state bit whether it is set in the layer or not.s const hsGMatState ICompositeLayerState(const plLayerInterface* layer); - static plLayerInterface* Passthrough(plLayerInterface* layer, uint32_t index) { - return layer; + const struct plMetalFragmentShaderDescription GetFragmentShaderDescription(size_t which) { + return fFragmentShaderDescriptions[which]; } - void GetSourceArray(uint8_t *array, uint8_t pass); - void GetBlendFlagArray(uint32_t *array, uint8_t pass); - void GetMiscFlagArray(uint32_t *array, uint8_t pass); - void GetSampleTypeArray(uint8_t *array, uint8_t pass); private: void ILoopOverLayers(); uint32_t fNumPasses; - uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform); + uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform); bool ICanEatLayer(plLayerInterface* lay); uint32_t ILayersAtOnce(uint32_t which); void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer); void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); + std::vector> fPasses; + std::vector fFragmentShaderDescriptions; }; #endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 361f4324d2..4958bf3a1f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1180,7 +1180,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, size_t pass; for (pass = 0; pass < mRef->GetNumPasses(); pass++) { - if ( IHandleMaterial(material, pass, &span, vRef) ) { + if ( IHandleMaterialPass(material, pass, &span, vRef) ) { render.RenderPrims(); } @@ -1279,7 +1279,7 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM AppendLayerInterface(&layLightBase, false); - IHandleMaterial( material, iPass, &span, vRef, false ); + IHandleMaterialPass( material, iPass, &span, vRef, false ); //FIXME: Hard setting of light IScaleLight(mRef, 7, true); @@ -1394,7 +1394,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) size_t pass; for (pass = 0; pass < mRef->GetNumPasses(); pass++) { - IHandleMaterial(material, pass, &span, vRef); + IHandleMaterialPass(material, pass, &span, vRef); if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) { fCurrentRenderPassUniforms->ambientCol = {1.0f, 1.0f, 1.0f, 1.0f}; @@ -1448,7 +1448,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) } -bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) +bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) { plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); @@ -1579,24 +1579,14 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons std::vector& spanLights = currSpan->GetLightList(false); int numActivePiggyBacks = 0; - //FIXME: In the DX source, this check was done on the first layer. Does that mean the first layer of the material or the first layer of the pass? if( !(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive) ) { /// Tack lightmap onto last stage if we have one numActivePiggyBacks = fActivePiggyBacks; - //if( numActivePiggyBacks > fMaxLayersAtOnce - fCurrNumLayers ) - // numActivePiggyBacks = fMaxLayersAtOnce - fCurrNumLayers; } - uint8_t sources[8]; - uint32_t blendModes[8]; - uint32_t miscFlags[8]; - uint8_t sampleTypes[8]; - memset(sources, 0, sizeof(sources)); - memset(blendModes, 0, sizeof(blendModes)); - memset(miscFlags, 0, sizeof(miscFlags)); - memset(sampleTypes, 0, sizeof(sampleTypes)); + struct plMetalFragmentShaderDescription fragmentShaderDescription; lay = IPopOverAllLayer(lay); lay = IPopOverBaseLayer(lay); @@ -1604,53 +1594,20 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons if(numActivePiggyBacks==0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass); - mRef->GetSourceArray(sources, pass); - mRef->GetBlendFlagArray(blendModes, pass); - mRef->GetMiscFlagArray(miscFlags, pass); - mRef->GetSampleTypeArray(sampleTypes, pass); + fragmentShaderDescription = mRef->GetFragmentShaderDescription(pass); } else { //Plasma pulls piggybacks from the rear first, pull the number of active piggybacks auto firstPiggyback = fPiggyBackStack.end() - numActivePiggyBacks; auto lastPiggyback = fPiggyBackStack.end(); std::vector subPiggybacks(firstPiggyback, lastPiggyback); - mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &subPiggybacks, + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fragmentShaderDescription, &subPiggybacks, [&](plLayerInterface* layer, uint32_t index){ if(index==0) { layer = IPushOverBaseLayer(layer); } layer = IPushOverAllLayer(layer); - plBitmap* texture = layer->GetTexture(); - if (texture != nullptr) { - plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); - if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { - sources[index] = PassTypeCubicTexture; - } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { - sources[index] = PassTypeTexture; - } - - } else { - sources[index] = PassTypeColor; - } - blendModes[index] = layer->GetBlendFlags(); - miscFlags[index] = layer->GetMiscFlags(); - - switch (layer->GetClampFlags()) { - case hsGMatState::kClampTextureU: - sampleTypes[index] = 1; - break; - case hsGMatState::kClampTextureV: - sampleTypes[index] = 2; - break; - case hsGMatState::kClampTexture: - sampleTypes[index] = 3; - break; - default: - sampleTypes[index] = 0; - break; - } - return layer; }, [&](plLayerInterface* layer, uint32_t index){ @@ -1660,15 +1617,8 @@ bool plMetalPipeline::IHandleMaterial(hsGMaterial *material, uint32_t pass, cons return layer; }); } - - struct plMetalMaterialPassDescription passDescription; - memcpy(passDescription.passTypes, sources, sizeof(sources)); - memcpy(passDescription.blendModes, blendModes, sizeof(blendModes)); - memcpy(passDescription.miscFlags, miscFlags, sizeof(miscFlags)); - memcpy(passDescription.sampleTypes, sampleTypes, sizeof(sampleTypes)); - passDescription.numLayers = numActivePiggyBacks + mRef->fPassLengths[pass]; - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, fragmentShaderDescription).GetRenderPipelineState(); const MTL::RenderPipelineState *pipelineState = linkedPipeline->pipelineState; /*plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered, sources, blendModes, miscFlags); @@ -2108,7 +2058,7 @@ void plMetalPipeline::ISetLayer( uint32_t lay ) fCurrRenderLayer = lay; plCONST(int) kBiasMult = 8; - fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, -kBiasMult, -kBiasMult); + fDevice.CurrentRenderCommandEncoder()->setDepthBias(-kBiasMult, -kBiasMult/2, -kBiasMult); } } else if( fCurrRenderLayer != 0 ) @@ -3940,7 +3890,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con // in projecting the shadow map onto the scene. ISetupShadowLight(fShadows[i]); - struct plMetalMaterialPassDescription passDescription; + struct plMetalFragmentShaderDescription passDescription; memset(&passDescription, 0, sizeof(passDescription)); passDescription.Populate(mat->GetLayer(0), 2); passDescription.numLayers = 3; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index d0283de53b..2dcbcbed2c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -147,7 +147,7 @@ class plMetalPipeline : public pl3DPipeline uint32_t iStart, uint32_t iLength); void IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux); void IRenderAuxSpans(const plSpan& span); - bool IHandleMaterial(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders = true); + bool IHandleMaterialPass(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders = true); plMetalDevice* GetMetalDevice(); // Create and/or Refresh geometry buffers diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 9727658eef..5f41316836 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -50,6 +50,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plPipeline/plCubicRenderTarget.h" #include "plPipeline/plRenderTarget.h" #include "plMetalDevice.h" +#include "plMetalMaterialShaderRef.h" size_t plMetalPipelineState::GetHash() const { return std::hash()(GetID()); @@ -92,24 +93,25 @@ void plMetalPipelineState::PrewarmRenderPipelineState() { } -plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalMaterialPassDescription &description) +plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription &description) : plMetalRenderSpanPipelineState(device, vRef) { - fPassDescription = description; + fFragmentShaderDescription = description; + fFragmentShaderDescription.CacheHash(); } void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const { plMetalRenderSpanPipelineState::GetFunctionConstants(constants); - constants->setConstantValue(&fPassDescription.numLayers, MTL::DataTypeUChar, FunctionConstantNumLayers); - constants->setConstantValues(&fPassDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); - constants->setConstantValues(&fPassDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); - constants->setConstantValues(&fPassDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); - constants->setConstantValues(&fPassDescription.sampleTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSampleTypes, 8)); + constants->setConstantValue(&fFragmentShaderDescription.numLayers, MTL::DataTypeUChar, FunctionConstantNumLayers); + constants->setConstantValues(&fFragmentShaderDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); + constants->setConstantValues(&fFragmentShaderDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); + constants->setConstantValues(&fFragmentShaderDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); + constants->setConstantValues(&fFragmentShaderDescription.sampleTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSampleTypes, 8)); } size_t plMetalMaterialPassPipelineState::GetHash() const { std::size_t value = plMetalRenderSpanPipelineState::GetHash(); - value ^= fPassDescription.GetHash(); + value ^= fFragmentShaderDescription.GetHash(); return value; } @@ -305,26 +307,16 @@ const NS::String* plMetalMaterialPassPipelineState::GetDescription() { } void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { - uint32_t blendMode = fPassDescription.blendModes[0]; + uint32_t blendMode = fFragmentShaderDescription.blendModes[0]; ConfigureBlendMode(blendMode, descriptor); } -void plMetalMaterialPassDescription::Populate(plLayerInterface* layPtr, uint8_t index) { - if (layPtr == nullptr) { - blendModes[index] = 0; - miscFlags[index] = 0; - passTypes[index] = 0; - } - +void plMetalFragmentShaderDescription::Populate(plLayerInterface* layPtr, uint8_t index) { blendModes[index] = layPtr->GetBlendFlags(); miscFlags[index] = layPtr->GetMiscFlags(); plBitmap* texture = layPtr->GetTexture(); if (texture != nullptr) { - plMetalTextureRef* texRef = (plMetalTextureRef*)texture->GetDeviceRef(); - if(texRef->fTexture) { - - plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { passTypes[index] = PassTypeCubicTexture; } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { @@ -332,7 +324,6 @@ void plMetalMaterialPassDescription::Populate(plLayerInterface* layPtr, uint8_t } else { passTypes[index] = PassTypeColor; } - } } else { passTypes[index] = PassTypeColor; @@ -356,7 +347,7 @@ void plMetalMaterialPassDescription::Populate(plLayerInterface* layPtr, uint8_t } bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) const { - return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fPassDescription == this->fPassDescription; + return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fFragmentShaderDescription == this->fFragmentShaderDescription; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index d152f67c54..f449c2a4d5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -47,7 +47,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #include "plMetalDevice.h" -#include "plMetalMaterialShaderRef.h" #include "plSurface/plShaderTable.h" class plMetalPipelineState { @@ -112,19 +111,29 @@ class plMetalRenderSpanPipelineState: public plMetalPipelineState { } }; -struct plMetalMaterialPassDescription { +struct plMetalFragmentShaderDescription { uint8_t passTypes[8]; uint32_t blendModes[8]; uint32_t miscFlags[8]; uint8_t sampleTypes[8]; uint8_t numLayers; - bool operator==(const plMetalMaterialPassDescription &p) const { + size_t hash; + + bool operator==(const plMetalFragmentShaderDescription &p) const { bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0 && memcmp(sampleTypes, p.sampleTypes, sizeof(sampleTypes)) == 0; return match; } - virtual size_t GetHash() const { + void CacheHash() { + if(!hash) + hash = GetHash(); + } + + size_t GetHash() const { + if(hash) + return hash; + std::size_t value = std::hash()(numLayers); value ^= std::hash()(numLayers); @@ -151,9 +160,9 @@ struct plMetalMaterialPassDescription { }; template<> -struct std::hash +struct std::hash { - std::size_t operator()(plMetalMaterialPassDescription const& s) const noexcept + std::size_t operator()(plMetalFragmentShaderDescription const& s) const noexcept { return s.GetHash(); } @@ -161,7 +170,7 @@ struct std::hash class plMetalMaterialPassPipelineState: public plMetalRenderSpanPipelineState { public: - plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, const plMetalMaterialPassDescription &description); + plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, const plMetalFragmentShaderDescription &description); virtual size_t GetHash() const override; MTL::Function* GetVertexFunction(MTL::Library* library) override; MTL::Function* GetFragmentFunction(MTL::Library* library) override; @@ -180,7 +189,7 @@ class plMetalMaterialPassPipelineState: public plMetalRenderSpanPipelineState { ~plMetalMaterialPassPipelineState(); virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const override; protected: - plMetalMaterialPassDescription fPassDescription; + plMetalFragmentShaderDescription fFragmentShaderDescription; }; class plMetalRenderShadowCasterPipelineState: public plMetalRenderSpanPipelineState { @@ -211,7 +220,7 @@ class plMetalRenderShadowCasterPipelineState: public plMetalRenderSpanPipelineSt class plMetalRenderShadowPipelineState: public plMetalMaterialPassPipelineState { public: - plMetalRenderShadowPipelineState(plMetalDevice* device, plMetalVertexBufferRef *vRef, const plMetalMaterialPassDescription &description) + plMetalRenderShadowPipelineState(plMetalDevice* device, plMetalVertexBufferRef *vRef, const plMetalFragmentShaderDescription &description) : plMetalMaterialPassPipelineState(device, vRef, description) { } From 0ff0d1aabaa6a03afb435534a7d47836d130f6f4 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 22 Jul 2022 22:09:06 -0700 Subject: [PATCH 078/165] Early version of Metal font rendering Still has known issues to fix --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 + .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 2 + .../ShaderSrc/TextFontShader.metal | 98 +++++ .../pfMetalPipeline/plMetalTextFont.cpp | 380 ++++++++++++++++++ .../pfMetalPipeline/plMetalTextFont.h | 85 ++++ 5 files changed, 566 insertions(+) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index a851657e01..e361b6e9c8 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -131,6 +131,7 @@ elseif(APPLE) ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal ../../FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal + ../../FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal ) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index 4552e1cabb..ff4c52816c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -19,6 +19,7 @@ set(pfMetalPipeline_SOURCES plMetalShader.cpp plMetalFragmentShader.cpp plMetalVertexShader.cpp + plMetalTextFont.cpp plMetalEnumerate.mm ) @@ -30,6 +31,7 @@ set(pfMetalPipeline_HEADERS plMetalPipelineState.h plMetalPlateManager.h plMetalShader.h + plMetalTextFont.h plMetalFragmentShader.h plMetalVertexShader.h ShaderSrc/ShaderTypes.h diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal new file mode 100644 index 0000000000..8d07e91daa --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal @@ -0,0 +1,98 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +using namespace metal; +// File for Metal kernel and shader functions + +#include +#include + +// Including header shared between this Metal shader code and Swift/C code executing Metal API commands +#import "ShaderTypes.h" + + +using namespace metal; + +typedef struct +{ + packed_float3 position; + uchar4 color; + packed_float3 UV; +} Vertex; + +typedef struct +{ + float4 position [[position]]; + float3 texCoord; + float4 normal; + half4 color; +} ColorInOut; + +vertex ColorInOut textFontVertexShader(constant Vertex *in [[ buffer(0) ]], + constant matrix_float4x4 & transform [[ buffer(1) ]], + uint v_id [[vertex_id]]) +{ + ColorInOut out; + + Vertex vert = in[v_id]; + float4 position = float4(vert.position, 1.0); + out.position = (transform * position); + out.texCoord = vert.UV; + out.normal = float4(0.0, 0.0, 1.0, 0.0); + out.color = half4(vert.color.b, vert.color.g, vert.color.r, vert.color.a) / 255.0f; + + return out; +} + +fragment half4 textFontFragmentShader(ColorInOut in [[stage_in]], + texture2d colorMap [[ texture(0) ]]) +{ + constexpr sampler colorSampler(mip_filter::nearest, + mag_filter::nearest, + min_filter::nearest); + + half4 colorSample = colorMap.sample(colorSampler, in.texCoord.xy); + colorSample *= in.color; + + return colorSample; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp new file mode 100644 index 0000000000..cccd9f82f3 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp @@ -0,0 +1,380 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +/////////////////////////////////////////////////////////////////////////////// +// // +// plDXTextFont Class Functions // +// Cyan, Inc. // +// // +//// Version History ////////////////////////////////////////////////////////// +// // +// 2.19.2001 mcn - Created. // +// // +/////////////////////////////////////////////////////////////////////////////// + +#include "plMetalTextFont.h" + +#include "HeadSpin.h" +#include "hsWindows.h" + +#include "plMetalPipeline.h" +#include "plPipeline/hsWinRef.h" + + +// Following number needs to be at least: 64 chars max in plTextFont drawn at any one time +// * 4 primitives per char max (for bold text) +// * 3 verts per primitive + +//const uint32_t kNumVertsInBuffer(32768); +const uint32_t kNumVertsInBuffer(4608); + +uint32_t plMetalTextFont::fBufferCursor = 0; +MTL::RenderPipelineState* plMetalTextFont::fRenderState = nullptr; + +//// Constructor & Destructor ///////////////////////////////////////////////// + +plMetalTextFont::plMetalTextFont( plPipeline *pipe, MTL::Device *device ) : plTextFont( pipe ), fTexture() +{ + fDevice = device; + fPipeline = (plMetalPipeline *)pipe; + CreateShared(&(fPipeline->fDevice)); +} + +plMetalTextFont::~plMetalTextFont() +{ + DestroyObjects(); +} + +//// ICreateTexture /////////////////////////////////////////////////////////// + +void plMetalTextFont::ICreateTexture( uint16_t *data ) +{ + printf("Create texture\n"); + + MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatRGBA8Unorm, fTextureWidth, fTextureHeight, false); + + fTexture->release(); + fTexture = fDevice->newTexture(descriptor); + fTexture->setLabel(NS::MakeConstantString("Font texture")); + + struct InDataValues { + uint8_t a: 4; + uint8_t r: 4; + uint8_t g: 4; + uint8_t b: 4; + }; + + struct OutDataValues { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + uint32_t *outData = new uint32_t[fTextureWidth * fTextureHeight]; + for(int i = 0; i < fTextureWidth * fTextureHeight; i++) { + InDataValues *in = (InDataValues *)(data + i); + OutDataValues *out = (OutDataValues *)(outData + i); + + out->r = in->r * 255; + out->b = in->b * 255; + out->g = in->g * 255; + out->a = in->a * 255; + } + + fTexture->replaceRegion(MTL::Region(0, 0, fTextureWidth, fTextureHeight), 0, outData, 4 * fTextureWidth); + delete[] outData; + /* + HRESULT hr; + D3DLOCKED_RECT lockInfo; + D3DCAPS9 d3dCaps; + + + // Check to make sure we can support it + fDevice->GetDeviceCaps( &d3dCaps ); + hsAssert( fTextureWidth <= d3dCaps.MaxTextureWidth, "Cannot initialize DX font--texture size too big" ); + + // Create our texture object + hr = fDevice->CreateTexture(fTextureWidth, fTextureHeight, 1, 0, D3DFMT_A4R4G4B4, D3DPOOL_MANAGED, &fD3DTexture, nullptr); + hsAssert( !FAILED( hr ), "Cannot create D3D texture" ); + + // Lock the texture and write our values out + fD3DTexture->LockRect(0, &lockInfo, nullptr, 0); + memcpy( lockInfo.pBits, data, fTextureWidth * fTextureHeight * sizeof( uint16_t ) ); + fD3DTexture->UnlockRect( 0 ); + */ +} + +void plMetalTextFont::CreateShared(plMetalDevice* device) +{ + MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init()->autorelease(); + MTL::Library* library = device->fMetalDevice->newDefaultLibrary(); + + MTL::Function* vertFunction = library->newFunction(NS::MakeConstantString("textFontVertexShader")); + MTL::Function* fragFunction = library->newFunction(NS::MakeConstantString("textFontFragmentShader")); + + descriptor->setVertexFunction(vertFunction); + descriptor->setFragmentFunction(fragFunction); + descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); + descriptor->setSampleCount(device->fSampleCount); + descriptor->setDepthAttachmentPixelFormat(MTL::PixelFormatDepth32Float_Stencil8); + + descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + + NS::Error* error; + fRenderState = device->fMetalDevice->newRenderPipelineState(descriptor, &error); +} + +void plMetalTextFont::ReleaseShared(MTL::Device* device) +{ +} + +//// IInitStateBlocks ///////////////////////////////////////////////////////// + +void plMetalTextFont::IInitStateBlocks() +{ +/* + for( int i = 0; i < 2; i++ ) + { + fDevice->BeginStateBlock(); + fDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE ); + fDevice->SetRenderState( D3DRS_SRCBLEND, D3DBLEND_SRCALPHA ); + fDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA ); + fDevice->SetRenderState( D3DRS_ALPHATESTENABLE, TRUE ); + fDevice->SetRenderState( D3DRS_ALPHAREF, 0x08 ); + fDevice->SetRenderState( D3DRS_ALPHAFUNC, D3DCMP_GREATEREQUAL ); + fDevice->SetRenderState( D3DRS_FILLMODE, D3DFILL_SOLID ); + fDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_CCW ); + + fDevice->SetRenderState( D3DRS_ZENABLE, TRUE ); + fDevice->SetRenderState( D3DRS_ZFUNC, D3DCMP_ALWAYS ); + fDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE ); + fDevice->SetRenderState( D3DRS_DEPTHBIAS, 0 ); + + fDevice->SetRenderState( D3DRS_STENCILENABLE, FALSE ); + fDevice->SetRenderState( D3DRS_CLIPPING, TRUE ); + fDevice->SetRenderState( D3DRS_ANTIALIASEDLINEENABLE, FALSE ); + fDevice->SetRenderState( D3DRS_VERTEXBLEND, FALSE ); + fDevice->SetRenderState( D3DRS_INDEXEDVERTEXBLENDENABLE, FALSE ); + fDevice->SetRenderState( D3DRS_FOGENABLE, FALSE ); + fDevice->SetTextureStageState( 0, D3DTSS_COLOROP, D3DTOP_MODULATE ); + fDevice->SetTextureStageState( 0, D3DTSS_COLORARG1, D3DTA_TEXTURE ); + fDevice->SetTextureStageState( 0, D3DTSS_COLORARG2, D3DTA_DIFFUSE ); + fDevice->SetTextureStageState( 0, D3DTSS_ALPHAOP, D3DTOP_MODULATE ); + fDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE ); + fDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE ); + fDevice->SetSamplerState( 0, D3DSAMP_MINFILTER, D3DTEXF_POINT ); + fDevice->SetSamplerState( 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT ); + fDevice->SetSamplerState( 0, D3DSAMP_MIPFILTER, D3DTEXF_NONE ); + fDevice->SetTextureStageState( 0, D3DTSS_TEXCOORDINDEX, 0 ); + fDevice->SetTextureStageState( 0, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2 ); + fDevice->SetTextureStageState( 1, D3DTSS_COLOROP, D3DTOP_DISABLE ); + fDevice->SetTextureStageState( 1, D3DTSS_ALPHAOP, D3DTOP_DISABLE ); + fDevice->SetRenderState( D3DRS_LIGHTING, FALSE ); + + if( i == 0 ) + fDevice->EndStateBlock( &fOldStateBlock ); + else + fDevice->EndStateBlock( &fTextStateBlock ); + } + */ +} + +//// DestroyObjects /////////////////////////////////////////////////////////// + +void plMetalTextFont::DestroyObjects() +{ + fInitialized = false; +} + +//// IDrawPrimitive /////////////////////////////////////////////////////////// + +void plMetalTextFont::IDrawPrimitive( uint32_t count, plFontVertex *array ) +{ + plFontVertex *v; + + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(fRenderState); + const uint maxCount = 4096/(sizeof(plFontVertex) * 3); + uint drawm = 0; + while(count > 0) { + uint drawCount = MIN(maxCount, count); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array + (drawm * 3), drawCount * 3 * sizeof( plFontVertex ), 0); + + fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), drawCount * 3); + + count -= drawCount; + drawm += drawCount; + } + + //if( !fBuffer ) + // return; + + /// Lock the buffer and write to it + /*if( fBufferCursor && (fBufferCursor + count * 3 < kNumVertsInBuffer) ) + { + // We can lock part of it + if( FAILED( fBuffer->Lock( fBufferCursor * sizeof( plFontVertex ), + count * 3 * sizeof( plFontVertex ), + (void **)&v, D3DLOCK_NOOVERWRITE ) ) ) + { + hsAssert( false, "Failed to lock vertex buffer for writing" ); + return; + } + + fBufferCursor += count * 3; + } + else + { + // Gotta start over + FlushDraws(); + fBufferCursor = count * 3; + + if( FAILED( fBuffer->Lock( 0, count * 3 * sizeof( plFontVertex ), + (void **)&v, D3DLOCK_DISCARD ) ) ) + { + hsAssert( false, "Failed to lock vertex buffer for writing" ); + return; + } + } + + if (v != nullptr && array != nullptr) + { + memcpy( v, array, count * sizeof( plFontVertex ) * 3 ); + } + + fBuffer->Unlock();*/ +} + +//// IDrawLines /////////////////////////////////////////////////////////////// + +void plMetalTextFont::IDrawLines( uint32_t count, plFontVertex *array ) +{ + + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(fRenderState); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array, count * 2 * sizeof( plFontVertex ), 0); + + matrix_float4x4 mat = matrix_identity_float4x4; + mat.columns[0][0] = 2.0f / (float)fPipe->Width(); + mat.columns[1][1] = -2.0f / (float)fPipe->Height(); + mat.columns[3][0] = -1.0; + mat.columns[3][1] = 1.0; + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof( matrix_float4x4 ), 1); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); + + fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeLine, NS::UInteger(0), count * 2); + /*if( !fBuffer ) + return; + + if (count == 0 || array == nullptr) + return; + + fDevice->SetVertexShader(nullptr); + fDevice->SetFVF(kFVF); + fDevice->SetStreamSource(0, fBuffer, 0, sizeof(plFontVertex)); + fDevice->DrawPrimitiveUP( D3DPT_LINELIST, count, (const void *)array, sizeof( plFontVertex ) );*/ +} + +//// FlushDraws /////////////////////////////////////////////////////////////// +// Flushes out and finishes any drawing left to be done. + +void plMetalTextFont::FlushDraws() +{ + /*if( !fBuffer ) + return; + + if( fBufferCursor > 0 ) + { + fDevice->SetVertexShader(nullptr); + fDevice->SetFVF(kFVF); + fDevice->SetStreamSource( 0, fBuffer, 0, sizeof( plFontVertex ) ); + fDevice->DrawPrimitive( D3DPT_TRIANGLELIST, 0, fBufferCursor / 3 ); + fBufferCursor = 0; + }*/ +} + +//// SaveStates /////////////////////////////////////////////////////////////// + +void plMetalTextFont::SaveStates() +{ + + matrix_float4x4 mat = matrix_identity_float4x4; + mat.columns[0][0] = 2.0f / (float)fPipe->Width(); + mat.columns[1][1] = -2.0f / (float)fPipe->Height(); + mat.columns[3][0] = -1.0; + mat.columns[3][1] = 1.0; + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof( matrix_float4x4 ), 1); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); + /*if( !fInitialized ) + IInitObjects(); + + if (fOldStateBlock) + fOldStateBlock->Capture(); + if (fTextStateBlock) + fTextStateBlock->Apply(); + + fDevice->SetTexture( 0, fD3DTexture ); + fDevice->SetTransform( D3DTS_TEXTURE0, &d3dIdentityMatrix ); + + /// Set up the transform matrices so that the vertices can range (0-screenWidth,0-screenHeight) + fDevice->SetTransform( D3DTS_WORLD, &d3dIdentityMatrix ); + fDevice->SetTransform( D3DTS_VIEW, &d3dIdentityMatrix ); + D3DMATRIX mat; + mat = d3dIdentityMatrix; + mat.m[0][0] = 2.0f / (float)fPipe->Width(); + mat.m[1][1] = -2.0f / (float)fPipe->Height(); + mat.m[3][0] = -1.0; + mat.m[3][1] = 1.0; + fDevice->SetTransform( D3DTS_PROJECTION, &mat );*/ +} + +//// RestoreStates //////////////////////////////////////////////////////////// + +void plMetalTextFont::RestoreStates() +{ + /*if (fOldStateBlock) + fOldStateBlock->Apply(); + + fDevice->SetTexture(0, nullptr); + fDevice->SetTransform( D3DTS_TEXTURE0, &d3dIdentityMatrix );*/ +} + diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h new file mode 100644 index 0000000000..17e6a57858 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -0,0 +1,85 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ +#ifndef _plDXTextFont_h +#define _plDXTextFont_h + +#include "plPipeline/plTextFont.h" +#include "plMetalPipeline.h" +#include + + +//// plDXTextFont Class Definition /////////////////////////////////////////// + +class plPipeline; + +class plMetalTextFont : public plTextFont +{ +protected: + static uint32_t fBufferCursor; + static MTL::RenderPipelineState* fRenderState; + + void ICreateTexture(uint16_t *data) override; + void IInitStateBlocks() override; + void IDrawPrimitive(uint32_t count, plFontVertex *array) override; + void IDrawLines(uint32_t count, plFontVertex *array) override; + + MTL::Texture* fTexture; + MTL::Device* fDevice; + + plMetalPipeline* fPipeline; + +public: + plMetalTextFont( plPipeline *pipe, MTL::Device *device ); + ~plMetalTextFont(); + + static void CreateShared(plMetalDevice* device); + static void ReleaseShared(MTL::Device* device); + + void FlushDraws() override; + void SaveStates() override; + void RestoreStates() override; + void DestroyObjects() override; +}; + + +#endif // _plDXTextFont_h + From bbbab15860fc82d72295b1863cfd319795dfd744 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 1 Oct 2023 23:18:19 -0700 Subject: [PATCH 079/165] Hooking in font rendering --- .../pfMetalPipeline/plMetalPipeline.cpp | 18 ++++++++++++++++-- .../pfMetalPipeline/plMetalPipeline.h | 4 +++- 2 files changed, 19 insertions(+), 3 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 4958bf3a1f..7f104376a5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -74,6 +74,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plgDispatch.h" #include "plDrawable/plAuxSpan.h" #include "plSurface/plLayerShadowBase.h" +#include "plMetalTextFont.h" #include "plGImage/plMipmap.h" #include "plGImage/plCubicEnvironmap.h" @@ -162,6 +163,7 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons fVtxBuffRefList = nullptr; fIdxBuffRefList = nullptr; fMatRefList = nullptr; + fTextFontRefList = nullptr; fCurrLayerIdx = 0; fDevice.fPipeline = this; @@ -277,8 +279,17 @@ bool plMetalPipeline::PrepForRender(plDrawable *drawable, std::vector & return true; } -plTextFont *plMetalPipeline::MakeTextFont(char *face, uint16_t size) { - return nullptr; +plTextFont *plMetalPipeline::MakeTextFont(ST::string face, uint16_t size) { + plTextFont *font; + + + font = new plMetalTextFont( this, fDevice.fMetalDevice ); + if (font == nullptr) + return nullptr; + font->Create( face, size ); + font->Link( &fTextFontRefList ); + + return font; } bool plMetalPipeline::OpenAccess(plAccessSpan &dst, plDrawableSpans *d, const plVertexSpan *span, bool readOnly) { return false; } @@ -3000,6 +3011,9 @@ void plMetalPipeline::IReleaseDynDeviceObjects() // themselves from their parent objects yet delete fDebugTextMgr; fDebugTextMgr = nil; + + while( fTextFontRefList ) + delete fTextFontRefList; while( fRenderTargetRefList ) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 2dcbcbed2c..0ad3bff289 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -115,7 +115,7 @@ class plMetalPipeline : public pl3DPipeline /*** VIRTUAL METHODS ***/ bool PreRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr=nullptr) override; bool PrepForRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr=nullptr) override; - plTextFont* MakeTextFont(char* face, uint16_t size) override; + plTextFont* MakeTextFont(ST::string face, uint16_t size) override; bool OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) override; bool CloseAccess(plAccessSpan& acc) override; void PushRenderRequest(plRenderRequest* req) override; @@ -256,6 +256,8 @@ class plMetalPipeline : public pl3DPipeline static plMetalEnumerate enumerator; + plTextFont* fTextFontRefList; + NS::AutoreleasePool* fCurrentPool; struct plMetalPipelineCurrentState { From e11c4dd457cfb95fe9eb7f01cb5019a631d11c4f Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 22 Jul 2022 22:16:54 -0700 Subject: [PATCH 080/165] Making Metal text font a friend of pipeline --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 0ad3bff289..f3d6e0b2e7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -97,6 +97,7 @@ class plMetalPipeline : public pl3DPipeline friend class plMetalPlateManager; friend class plMetalMaterialShaderRef; friend class plRenderTriListFunc; + friend class plMetalTextFont; plMetalMaterialShaderRef* fMatRefList; plMetalRenderTargetRef* fRenderTargetRefList; From 9f24e42e6563f44e1dd1b969bbbcdecc870f9389 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 23 Jul 2022 15:15:43 -0700 Subject: [PATCH 081/165] =?UTF-8?q?Fixing=20reflections=20in=20K=E2=80=99v?= =?UTF-8?q?eer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reflections were having shadows renderer on them with out of range shadow values. This clamps the values so they can’t be out of range. The DX implementation used a LUT texture here that implicitly clamped the range. --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 30f8a2f6aa..c34bcbde88 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -733,7 +733,7 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], currentColor.rgb *= in.vtxColor.rgb; const float2 LUTCoords = in.texCoord2.xy; - const half4 LUTColor = half4(LUTCoords.x); + const half4 LUTColor = clamp(half4(LUTCoords.x), 0.0h, 1.0h);; currentColor.rgb = (1.0h - LUTColor.rgb) * currentColor.rgb; currentColor.a = LUTColor.a - currentColor.a; From 64e61884bde565ce180328d38ce6d0f95383a69f Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 24 Jul 2022 15:19:09 -0700 Subject: [PATCH 082/165] Fixes for age anomolies Fixing case where age UV index was encoded out of range (Great Tree Pub), and texture had no usable mipmap levels (GoMePubNew) --- .../ShaderSrc/FixedPipelineShaders.metal | 7 ++++++- .../FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 14 +++++++++++++- .../FeatureLib/pfMetalPipeline/plMetalDeviceRef.h | 2 +- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index c34bcbde88..f6a3e956a2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -448,7 +448,12 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co default: { const int index = UVWSrc & 0x0F; - sampleCoord = matrix * float4(texCoords[index], 1.0); + if (index < num_uvs) { + sampleCoord = matrix * float4(texCoords[index], 1.0); + } else { + //The DX engine will use a UV co-ord of 0,0 if the index is out of range + sampleCoord = float4(0.0); + } } break; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 2db035b46b..552eb0256f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -761,6 +761,10 @@ uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMi while ((mipmap->GetCurrWidth() | mipmap->GetCurrHeight()) & 0x03) { tRef->fLevels--; hsAssert(tRef->fLevels >= 0, "How was this ever compressed?" ); + if(tRef->fLevels < 0) { + tRef->fLevels = -1; + break; + } mipmap->SetCurrLevel(tRef->fLevels); } } @@ -786,6 +790,11 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *i uint height = tRef->fTexture->height(); #endif + if (tRef->fLevels == -1) { + hsAssert(1, "Bad texture found"); + return; + } + for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { img->SetCurrLevel(lvl); #if HACK_LEVEL_SIZE @@ -857,8 +866,11 @@ void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* im tRef->fLevels = img->GetNumLevels() - 1; //if(!tRef->fTexture) { ConfigureAllowedLevels(tRef, img); + + bool textureIsValid = tRef->fLevels > 0; + //texture doesn't exist yet, create it - bool supportsMipMap = tRef->fLevels; + bool supportsMipMap = tRef->fLevels && textureIsValid; MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(tRef->fFormat, img->GetWidth(), img->GetHeight(), supportsMipMap); descriptor->setUsage(MTL::TextureUsageShaderRead); //if device has unified memory, set storage mode to shared diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h index 78825c3d63..4a58756a9a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -247,7 +247,7 @@ class plMetalTextureRef : public plMetalDeviceRef public: plBitmap* fOwner; - uint32_t fLevels; + int32_t fLevels; MTL::Texture* fTexture; MTL::PixelFormat fFormat; From 4f2e935bdbdd63412eb3f472a10693d4e8e8eba9 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 27 Jul 2022 22:18:20 -0700 Subject: [PATCH 083/165] Simple changes for Gamma correction --- .../ShaderSrc/GammaCorrection.metal | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal index da2290a278..884b7a1fc1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal @@ -65,9 +65,8 @@ vertex GammaVertexOut gammaCorrectVertex(constant GammaVertexIn *in [[ buffer(0) return out; } -const constant sampler sourceSampler = sampler(); const constant sampler lutSampler = sampler( - filter::nearest + filter::linear ); fragment half4 gammaCorrectFragment( @@ -76,10 +75,11 @@ fragment half4 gammaCorrectFragment( texture1d_array LUT [[texture(1)]] ) { - float4 color = inputTexture.sample(sourceSampler, in.texturePosition); - half4 out = half(1); - out.r = half(float(LUT.sample(lutSampler, color.r, 0).x)/USHRT_MAX); - out.g = half(float(LUT.sample(lutSampler, color.g, 1).x)/USHRT_MAX); - out.b = half(float(LUT.sample(lutSampler, color.b, 2).x)/USHRT_MAX); - return out; + float4 color = inputTexture.read(ushort2(in.position.xy)); + return { + half(float(LUT.sample(lutSampler, color.r, 0).x)/USHRT_MAX), + half(float(LUT.sample(lutSampler, color.g, 1).x)/USHRT_MAX), + half(float(LUT.sample(lutSampler, color.b, 2).x)/USHRT_MAX), + 1.0 + }; } From 2920f37a2c579d75951c957ad1e62c0d6be45279 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 27 Jul 2022 22:19:27 -0700 Subject: [PATCH 084/165] Deleting numUVSrcs --- .../Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h | 1 - .../FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp | 2 -- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 1 - 3 files changed, 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index 0e8d8d2fe1..160f8b3f2e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -158,7 +158,6 @@ typedef struct plMetalShaderLightSource lampSources[8]; - uint8_t numUVSrcs; UVOutDescriptor uvTransforms[8]; #ifdef __METAL_VERSION__ float3 sampleLocation(size_t index, thread float3 *texCoords, const float4 normal, const float4 camPosition) constant; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index d3bb7f3ab3..7a9ee75ed2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -148,8 +148,6 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode layers.insert(layers.end(), piggyBacks->begin(), piggyBacks->end()); } - vertexUniforms->numUVSrcs = layers.size(); - plMetalFragmentShaderArgumentBuffer uniforms; IHandleMaterial(GetPassIndex(pass), passDescription, &uniforms, piggyBacks, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 7f104376a5..4c49e72db3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2547,7 +2547,6 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) matrix_float4x4 modelMatrix; uniforms.worldToCameraMatrix = modelMatrix; uniforms.uvTransforms[0].UVWSrc = 0; - uniforms.numUVSrcs = 1; //uniforms.worldToLocalMatrix = fDevice.fMatrixW2L; //flip world to camera, it's upside down From 562043768cc624b93b45ad2132c683711e1fa084 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 27 Jul 2022 22:32:16 -0700 Subject: [PATCH 085/165] Initial version of shadow blurring --- .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 1 + .../pfMetalPipeline/plMetalDevice.cpp | 3 + .../pfMetalPipeline/plMetalDevice.h | 3 + .../plMetalDevicePerformanceShaders.mm | 66 +++++++++++++++++++ .../pfMetalPipeline/plMetalPipeline.cpp | 18 ++--- 5 files changed, 79 insertions(+), 12 deletions(-) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index ff4c52816c..cd55e74749 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -21,6 +21,7 @@ set(pfMetalPipeline_SOURCES plMetalVertexShader.cpp plMetalTextFont.cpp plMetalEnumerate.mm + plMetalDevicePerformanceShaders.mm ) set(pfMetalPipeline_HEADERS diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 552eb0256f..93f1a0af5b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -1144,6 +1144,9 @@ bool plMetalDevice::plMetalPipelineRecord::operator==(const plMetalPipelineRecor MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() { + if(fCurrentOffscreenCommandBuffer) { + return fCurrentOffscreenCommandBuffer; + } return fCurrentCommandBuffer; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 9b9ff552c2..760260c209 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -242,6 +242,9 @@ class plMetalDevice void BeginNewRenderPass(); void ReleaseSamplerStates(); + + //Blur states + std::unordered_map fBlurShaders; }; #endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm new file mode 100644 index 0000000000..0efe644dba --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm @@ -0,0 +1,66 @@ +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ + +#include +#include "plMetalDevice.h" +#include +#include + +void plMetalDevice::EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma) +{ + //FIXME: Blurring currently ends a pass - and restarting a pass will possibly clear one or more buffers + //Technically shadow blurring only happens at the end of the render pass though... + CurrentRenderCommandEncoder()->endEncoding(); + fCurrentRenderTargetCommandEncoder = nil; + + //look up the shader by sigma value + MPSImageGaussianBlur *blur = (MPSImageGaussianBlur *)fBlurShaders[sigma]; + + //we don't have one, need to create one + if (!blur) { + blur = [[MPSImageGaussianBlur alloc] initWithDevice:(id)fMetalDevice sigma:sigma]; + fBlurShaders[sigma] = (NS::Object*)blur; + } + [blur encodeToCommandBuffer:(id)commandBuffer inPlaceTexture:(id*)&texture fallbackCopyAllocator:^ id (MPSKernel * kernel, id commandBuffer, id texture) { + return (id)fMetalDevice->newTexture((MTL::TextureDescriptor*)texture.description); + }]; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 4c49e72db3..72da3ba0f9 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -525,7 +525,7 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) textureDescriptor->setWidth(owner->GetWidth()); textureDescriptor->setHeight(owner->GetHeight()); textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); - textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); textureDescriptor->setStorageMode(MTL::StorageModePrivate); plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); @@ -3206,10 +3206,9 @@ bool plMetalPipeline::IRenderShadowCaster(plShadowSlave* slave) slave->fBlurScale = blurScale; // If this shadow requests being blurred, do it. - //TODO: Shadow blurring - //if( slave->fBlurScale > 0.f ) - //IBlurShadowMap(slave); - + if( slave->fBlurScale > 0.f ) + fDevice.EncodeBlur(fDevice.GetCurrentCommandBuffer(), fDevice.fCurrentFragmentOutputTexture, slave->fBlurScale); + // Finished up, restore previous state. IPopShadowCastState(slave); @@ -3637,7 +3636,8 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe } MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), owner->GetHeight(), false); - textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + //Give compute shader write access + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); if( texture ) { @@ -3993,8 +3993,6 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) fState.fCurrentDepthStencilState = fDevice.fNoZWriteStencilState; } - int numUVSrcs = 2; - int layerIndex = -1; // If mat's base layer is alpha'd, and we have > 3 TMU's factor // in the base layer's alpha. @@ -4026,13 +4024,9 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) matrix_float4x4 tXfm; hsMatrix2SIMD(layer->GetTransform(), &tXfm); fCurrentRenderPassUniforms->uvTransforms[2].transform = tXfm; - - numUVSrcs++; } fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&layerIndex, sizeof(int), FragmentShaderArgumentShadowAlphaSrc); - - fCurrentRenderPassUniforms->numUVSrcs = numUVSrcs; } // ISetShadowLightState ////////////////////////////////////////////////////////////////// From 22d96d0699acbe21a68d440b9252b3700a9484e1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 27 Jul 2022 22:33:26 -0700 Subject: [PATCH 086/165] More blurring --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 760260c209..0d20096eef 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -180,6 +180,9 @@ class plMetalDevice return fSampleCount; } } + + void EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma); + private: struct plMetalPipelineRecord { From 9d551c0f48a1eddd774abfd0d4fc13a3e7d2e632 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 27 Jul 2022 22:33:48 -0700 Subject: [PATCH 087/165] Initial version of Metal over all projections --- .../pfMetalPipeline/plMetalPipeline.cpp | 106 +++++++++++++++--- .../pfMetalPipeline/plMetalPipeline.h | 8 +- 2 files changed, 95 insertions(+), 19 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 72da3ba0f9..5c87ce6a2d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1239,8 +1239,11 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, if( pass >= 0 ) { // Projections that get applied to the frame buffer (after all passes). - //if( fLights.fProjAll.GetCount() && !(fView.fRenderState & kRenderNoProjection) ) - // IRenderProjections(render); + if( fProjAll.size() && !(fView.fRenderState & kRenderNoProjection) ) { + fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::MakeConstantString("Render All Projections")); + IRenderProjections(render, vRef); + fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); + } // Handle render of shadows onto geometry. if( fShadows.size() ) { @@ -1261,6 +1264,75 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, #endif } +// IRenderProjections /////////////////////////////////////////////////////////// +// Render any projected lights that want to be rendered a single time after +// all passes on the object are complete. +void plMetalPipeline::IRenderProjections(const plRenderPrimFunc& render, const plMetalVertexBufferRef* vRef) +{ + PushCurrentLightSources(); + IDisableLightsForShadow(); + for (plLightInfo* li : fProjAll) + { + IRenderProjection(render, li, vRef); + } + PopCurrentLightSources(); +} + +// IRenderProjection ////////////////////////////////////////////////////////////// +// Render this light's projection onto the frame buffer. +void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li, const plMetalVertexBufferRef* vRef) +{ + // Enable the projecting light only. + IEnableLight(7, li); + + plLayerInterface* proj = li->GetProjection(); + CheckTextureRef(proj); + plMetalTextureRef* tex = (plMetalTextureRef*)proj->GetTexture()->GetDeviceRef(); + + IScaleLight(7, true); + + fCurrentRenderPassUniforms->ambientCol = half4(0.0); + fCurrentRenderPassUniforms->ambientSrc = 1.0; + fCurrentRenderPassUniforms->diffuseSrc = 1.0; + fCurrentRenderPassUniforms->emissiveSrc = 1.0; + fCurrentRenderPassUniforms->specularSrc = 1.0; + fCurrentRenderPassUniforms->fogValues = {0.0, 0.0f}; + fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->diffuseCol = {1.0, 1.0, 1.0, 1.0}; + + + matrix_float4x4 tXfm; + hsMatrix2SIMD(proj->GetTransform(), &tXfm); + fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; + fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = proj->GetUVWSrc(); + + fCurrNumLayers = 1; + // We should have put ZNoZWrite on during export, but we didn't. + IHandleZMode(hsGMatState::kZNoZWrite); + + //This is a bit weird - in since this isn't a material we need to build a query for the right Metal program ourselves + plMetalFragmentShaderDescription description; + memset(&description, 0, sizeof(description)); + description.numLayers = 1; + + description.Populate(proj, 0); + //DX sets the color invert when the final color should be inverted. Not sure why! + if( proj->GetBlendFlags() & hsGMatState::kBlendInvertFinalColor ) { + description.blendModes[0] |= hsGMatState::kBlendInvertColor; + } + + plMetalMaterialPassPipelineState materialShaderState(&fDevice, vRef, description); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = materialShaderState.GetRenderPipelineState(); + + fState.fCurrentPipelineState = linkedPipeline->pipelineState; + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(tex->fTexture, 0); + + // Okay, render it already. + + render.RenderPrims(); +} + // IRenderProjectionEach /////////////////////////////////////////////////////////////////////////////////////// // Render any lights that are to be projected onto each pass of the object. void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef) @@ -1286,15 +1358,14 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM IPushProjPiggyBack(proj); // Enable the projecting light only. - IEnableLight(mRef, 7, li); + IEnableLight(7, li); AppendLayerInterface(&layLightBase, false); IHandleMaterialPass( material, iPass, &span, vRef, false ); //FIXME: Hard setting of light - IScaleLight(mRef, 7, true); - //mRef->encodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, iPass, fActivePiggyBacks, &fPiggyBackStack, fOverBaseLayer); + IScaleLight(7, true); // Do the render with projection. render.RenderPrims(); @@ -1302,7 +1373,7 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM RemoveLayerInterface(&layLightBase, false); // Disable the projecting light - IDisableLight(mRef, 7); + IDisableLight(7); // Pop it's projected texture off piggyback IPopProjPiggyBacks(); @@ -2069,7 +2140,10 @@ void plMetalPipeline::ISetLayer( uint32_t lay ) fCurrRenderLayer = lay; plCONST(int) kBiasMult = 8; - fDevice.CurrentRenderCommandEncoder()->setDepthBias(-kBiasMult, -kBiasMult/2, -kBiasMult); + static float mult [[gnu::used]] = -8.0; + static float constBias [[gnu::used]] = -0.0; + static float max [[gnu::used]] = -0.00001; + fDevice.CurrentRenderCommandEncoder()->setDepthBias(constBias, mult, max); } } else if( fCurrRenderLayer != 0 ) @@ -2362,7 +2436,7 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef // If these are non-projected lights, go ahead and enable them. if( !proj ) { - IEnableLight(mRef, i, spanLights[i]); + IEnableLight(i, spanLights[i]); } onLights.emplace_back(spanLights[i]); } @@ -2384,7 +2458,7 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef for (; i > 0 && span->GetLightStrength(i, proj) < overHold; i--) { scale = (overHold - span->GetLightStrength(i, proj)) / (overHold - threshhold); - IScaleLight(mRef, i, (1 - scale) * span->GetLightScale(i, proj)); + IScaleLight(i, (1 - scale) * span->GetLightScale(i, proj)); } startScale = i + 1; } @@ -2392,7 +2466,7 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef /// Make sure those lights that aren't scaled....aren't for (i = 0; i < startScale; i++) { - IScaleLight(mRef, i, span->GetLightScale(i, proj)); + IScaleLight(i, span->GetLightScale(i, proj)); } } @@ -2412,11 +2486,11 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef } for (; i < numLights; i++) { - IDisableLight(mRef, i); + IDisableLight(i); } } -void plMetalPipeline::IEnableLight(plMetalMaterialShaderRef* mRef, size_t i, plLightInfo* light) +void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) { hsColorRGBA amb = light->GetAmbient(); fCurrentRenderPassUniforms->lampSources[i].ambient = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a) }; @@ -2466,11 +2540,11 @@ void plMetalPipeline::IEnableLight(plMetalMaterialShaderRef* mRef, size_t i, plL } } else { - IDisableLight(mRef, i); + IDisableLight(i); } } -void plMetalPipeline::IDisableLight(plMetalMaterialShaderRef* mRef, size_t i) +void plMetalPipeline::IDisableLight(size_t i) { fCurrentRenderPassUniforms->lampSources[i].position = { 0.0f, 0.0f, 0.0f, 0.0f }; fCurrentRenderPassUniforms->lampSources[i].ambient = { 0.0f, 0.0f, 0.0f, 0.0f }; @@ -2482,7 +2556,7 @@ void plMetalPipeline::IDisableLight(plMetalMaterialShaderRef* mRef, size_t i) fCurrentRenderPassUniforms->lampSources[i].scale = { 0.0f }; } -void plMetalPipeline::IScaleLight(plMetalMaterialShaderRef* mRef, size_t i, float scale) +void plMetalPipeline::IScaleLight(size_t i, float scale) { scale = int(scale * 1.e1f) * 1.e-1f; fCurrentRenderPassUniforms->lampSources[i].scale = scale; @@ -4063,7 +4137,7 @@ void plMetalPipeline::IDisableLightsForShadow() int i; for( i = 0; i < 8; i++ ) { - IDisableLight(nullptr, i); + IDisableLight(i); } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index f3d6e0b2e7..04b6cabf96 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -170,9 +170,9 @@ class plMetalPipeline : public pl3DPipeline void FindFragFunction(); void ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj = false); - void IEnableLight(plMetalMaterialShaderRef* mRef, size_t i, plLightInfo* light); - void IDisableLight(plMetalMaterialShaderRef* mRef, size_t i); - void IScaleLight(plMetalMaterialShaderRef* mRef, size_t i, float scale); + void IEnableLight(size_t i, plLightInfo* light); + void IDisableLight(size_t i); + void IScaleLight(size_t i, float scale); void ICalcLighting(plMetalMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan); void IHandleBlendMode(hsGMatState flags); void IHandleZMode(hsGMatState flags); @@ -234,6 +234,8 @@ class plMetalPipeline : public pl3DPipeline void IDisableLightsForShadow(); void IReleaseRenderTargetPools(); void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); + void IRenderProjections(const plRenderPrimFunc& render, const plMetalVertexBufferRef* vRef); + void IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li, const plMetalVertexBufferRef* vRef); void ISetLayer( uint32_t lay ); From 006c91d1898eae958e2058479e80452bd2db6066 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 27 Jul 2022 22:35:31 -0700 Subject: [PATCH 088/165] Initial version of private Metal textures MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Textures can now be stored in private storage in Metal - which means on discrete cards they can be VRAM only. This feature is disabled on Apple Silicon - but perhaps should be enabled. Even though Apple Silicon doesn’t have VRAM, the GPU might be able to store a private texture in a better storage format. --- .../pfMetalPipeline/plMetalDevice.cpp | 68 +++++++++++++++---- .../pfMetalPipeline/plMetalDevice.h | 5 ++ 2 files changed, 60 insertions(+), 13 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 93f1a0af5b..0b458fcebd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -40,6 +40,9 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ +#ifndef plMetalDevice_hpp +#define plMetalDevice_hpp + //We need to define these once for Metal somewhere in a cpp file #define NS_PRIVATE_IMPLEMENTATION #define CA_PRIVATE_IMPLEMENTATION @@ -344,7 +347,6 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) blitEncoder->synchronizeResource(fCurrentFragmentOutputTexture); blitEncoder->endEncoding(); } - fCurrentOffscreenCommandBuffer->enqueue(); fCurrentOffscreenCommandBuffer->commit(); if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { //if it's an offscreen buffer, wait for completion @@ -399,7 +401,9 @@ plMetalDevice::plMetalDevice() fCurrentFragmentMSAAOutputTexture(nullptr), fCurrentUnprocessedOutputTexture(nullptr), fGammaLUTTexture(nullptr), - fGammaAdjustState(nullptr) + fGammaAdjustState(nullptr), + fBlitCommandBuffer(nullptr), + fBlitCommandEncoder(nullptr) { fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; @@ -873,17 +877,31 @@ void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* im bool supportsMipMap = tRef->fLevels && textureIsValid; MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(tRef->fFormat, img->GetWidth(), img->GetHeight(), supportsMipMap); descriptor->setUsage(MTL::TextureUsageShaderRead); - //if device has unified memory, set storage mode to shared - if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { - descriptor->setStorageMode(MTL::StorageModeShared); - } - //Metal gets mad if we set this with 0, only set it if we know there are mipmaps - if(supportsMipMap) { - descriptor->setMipmapLevelCount(tRef->fLevels + 1); - } - tRef->fTexture = fMetalDevice->newTexture(descriptor); - //} + + //Metal gets mad if we set this with 0, only set it if we know there are mipmaps + if(supportsMipMap) { + descriptor->setMipmapLevelCount(tRef->fLevels + 1); + } + + //if device has unified memory, set storage mode to shared + if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { + descriptor->setStorageMode(MTL::StorageModeShared); + } else { + descriptor->setStorageMode(MTL::StorageModeManaged); + } + + + tRef->fTexture = fMetalDevice->newTexture(descriptor); PopulateTexture( tRef, img, 0); + if(!fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { + descriptor->setStorageMode(MTL::StorageModePrivate); + MTL::Texture* privateTexture = fMetalDevice->newTexture(descriptor); + BlitTexture(tRef->fTexture, privateTexture); + tRef->fTexture->autorelease(); + tRef->fTexture = privateTexture; + } + //} + tRef->SetDirty(false); } @@ -1152,6 +1170,17 @@ MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() void plMetalDevice::SubmitCommandBuffer() { + if (fBlitCommandEncoder) { + fBlitCommandEncoder->endEncoding(); + fBlitCommandBuffer->commit(); + + fBlitCommandBuffer->release(); + fBlitCommandEncoder->release(); + + fBlitCommandBuffer = nullptr; + fBlitCommandEncoder = nullptr; + } + fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; @@ -1161,7 +1190,6 @@ void plMetalDevice::SubmitCommandBuffer() } fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); - fCurrentCommandBuffer->enqueue(); fCurrentCommandBuffer->commit(); //as we more tightly manage resource sync we may be able to avoid waiting for the frame to complete //fCurrentCommandBuffer->waitUntilCompleted(); @@ -1259,3 +1287,17 @@ CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() { return fCurrentDrawable; } + +void plMetalDevice::BlitTexture(MTL::Texture* src, MTL::Texture* dst) +{ + if (fBlitCommandEncoder == nullptr) { + fBlitCommandBuffer = fCommandQueue->commandBuffer()->retain(); + //enqueue so we go to the front of the line before render + fBlitCommandBuffer->enqueue(); + fBlitCommandEncoder = fBlitCommandBuffer->blitCommandEncoder()->retain(); + } + + fBlitCommandEncoder->copyFromTexture(src, dst); +} + +#endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 0d20096eef..73c654fa83 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -181,6 +181,8 @@ class plMetalDevice } } + void BlitTexture(MTL::Texture* src, MTL::Texture* dst); + void EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma); private: @@ -236,6 +238,9 @@ class plMetalDevice plRenderTarget* fCurrentRenderTarget; MTL::SamplerState* fSamplerStates[4]; + MTL::CommandBuffer* fBlitCommandBuffer; + MTL::BlitCommandEncoder* fBlitCommandEncoder; + bool NeedsPostprocessing() { return fGammaLUTTexture != nullptr; } From 45aa3f295fe80019bfac1c28faf6a693531b5cb0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 29 Jul 2022 21:50:15 -0700 Subject: [PATCH 089/165] Fixing copy allocator for blur Description is a string, not the texture description. --- .../pfMetalPipeline/plMetalDevicePerformanceShaders.mm | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm index 0efe644dba..92b6b0c1a7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm @@ -61,6 +61,8 @@ fBlurShaders[sigma] = (NS::Object*)blur; } [blur encodeToCommandBuffer:(id)commandBuffer inPlaceTexture:(id*)&texture fallbackCopyAllocator:^ id (MPSKernel * kernel, id commandBuffer, id texture) { - return (id)fMetalDevice->newTexture((MTL::TextureDescriptor*)texture.description); + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor((MTL::PixelFormat)texture.pixelFormat, texture.width, texture.height, false); + descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + return (id)fMetalDevice->newTexture(descriptor); }]; } From 99ff52039eba7e1037074621b5ab935c46a19667 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 29 Jul 2022 21:51:18 -0700 Subject: [PATCH 090/165] Removing old fPipelineState This is a leftover from the initial Metal prototype --- .../pfMetalPipeline/plMetalPipeline.cpp | 22 +------------------ 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 5c87ce6a2d..236c33e08b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -157,7 +157,7 @@ bool plRenderTriListFunc::RenderPrims() const -plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), fPipelineState(nullptr), fCurrentRenderPassUniforms(nullptr), currentDrawableCallback(nullptr), fFragFunction(nullptr), fVShaderRefList(nullptr), fPShaderRefList(nullptr), fULutTextureRef(nullptr), fCurrRenderLayer() +plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), fCurrentRenderPassUniforms(nullptr), currentDrawableCallback(nullptr), fFragFunction(nullptr), fVShaderRefList(nullptr), fPShaderRefList(nullptr), fULutTextureRef(nullptr), fCurrRenderLayer() { fTextureRefList = nullptr; fVtxBuffRefList = nullptr; @@ -168,26 +168,6 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons fCurrLayerIdx = 0; fDevice.fPipeline = this; - //Compile the shaders and link our pipeline - MTL::Library *library = fDevice.fMetalDevice->newDefaultLibrary(); - MTL::Function *fragFunction = library->newFunction( - NS::String::string("fragmentShader", NS::ASCIIStringEncoding) - ); - MTL::Function *vertFunction = library->newFunction( - NS::String::string("plateVertexShader", NS::ASCIIStringEncoding) - ); - MTL::RenderPipelineDescriptor *descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); - descriptor->setFragmentFunction(fragFunction); - descriptor->setVertexFunction(vertFunction); - descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); - - NS::Error *error; - fPipelineState = fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); - library->release(); - fragFunction->release(); - vertFunction->release(); - descriptor->release(); - fMaxLayersAtOnce = 8; // Alloc half our simultaneous textures to piggybacks. From 6fa656598d2463c3278af1049efff0bf4514182d Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 31 Jul 2022 12:33:23 -0700 Subject: [PATCH 091/165] Adding missing MPS link --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index e361b6e9c8..65b9332ca5 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -185,6 +185,11 @@ if(APPLE) set_source_files_properties(Mac-Cocoa/Assets.xcassets ${RESOURCES} PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) + target_link_libraries(plClient PRIVATE "-framework MetalPerformanceShaders") + install( + TARGETS plClient + DESTINATION client + ) if(PLASMA_APPLE_DEVELOPMENT_TEAM_ID) set_target_properties(plClient PROPERTIES From eb28191667442bb1d667fddbf8a96a279aa51226 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 31 Jul 2022 12:45:15 -0700 Subject: [PATCH 092/165] Removing ranges import MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ranges is a C++20 feature I didn’t end up using anyway --- .../FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 7a9ee75ed2..3cdaf4dbdd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -61,8 +61,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMetalDevice.h" #include "plMetalPipeline.h" -#include - plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline *pipe) : fPipeline { pipe }, fMaterial { mat }, From 406fce4be4f5fe9c1dcd3b2bd89f3a215367f9db Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 24 Sep 2022 22:27:42 -0700 Subject: [PATCH 093/165] Adding deep color support Also forcing plMetalTextFont and plates back through the devices pipeline state management to match the current color space --- .../pfConsole/pfConsoleCommands.cpp | 4 + .../ShaderSrc/GammaCorrection.metal | 2 +- .../pfMetalPipeline/plMetalDevice.cpp | 29 ++++++- .../pfMetalPipeline/plMetalDevice.h | 7 ++ .../pfMetalPipeline/plMetalPipeline.cpp | 40 +++++++-- .../pfMetalPipeline/plMetalPipeline.h | 8 +- .../pfMetalPipeline/plMetalPipelineState.h | 2 +- .../pfMetalPipeline/plMetalPlateManager.cpp | 85 ++++++++++--------- .../pfMetalPipeline/plMetalPlateManager.h | 24 +++++- .../pfMetalPipeline/plMetalTextFont.cpp | 65 +++++++++----- .../pfMetalPipeline/plMetalTextFont.h | 26 +++++- 11 files changed, 212 insertions(+), 80 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp b/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp index 539400e173..f43f926537 100644 --- a/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp +++ b/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp @@ -1341,6 +1341,10 @@ PF_CONSOLE_CMD( Graphics_Renderer, Gamma2, "float g", "Set gamma value (alternat float sinT = std::sin(t * hsConstants::pi / 2.f); float remap = std::clamp(t + (sinT - t) * g, 0.f, 1.f); + if( remap < 0 ) + remap = 0; + else if( remap > 1.f ) + remap = 1.f; ramp[i] = uint16_t(remap * float(uint16_t(-1)) + 0.5f); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal index 884b7a1fc1..16f6d8ae92 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal @@ -66,7 +66,7 @@ vertex GammaVertexOut gammaCorrectVertex(constant GammaVertexIn *in [[ buffer(0) } const constant sampler lutSampler = sampler( - filter::linear + filter::nearest ); fragment half4 gammaCorrectFragment( diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 0b458fcebd..97cd94fab4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -441,8 +441,6 @@ plMetalDevice::plMetalDevice() fReverseZStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); depthDescriptor->release(); - - CreateGammaAdjustState(); } void plMetalDevice::SetViewport() { @@ -750,6 +748,25 @@ void plMetalDevice::SetupTextureRef(plBitmap *img, plMetalDevice::TextureRef *tR hsRefCnt_SafeUnRef(tRef); } +void plMetalDevice::ReleaseFramebufferObjects() +{ + if (fCurrentUnprocessedOutputTexture) + fCurrentUnprocessedOutputTexture->release(); + fCurrentFragmentOutputTexture = nil; + + if (fGammaAdjustState) + fGammaAdjustState->release(); + fGammaAdjustState = nil; +} + +void plMetalDevice::SetFramebufferFormat(MTL::PixelFormat format) +{ + if (fFramebufferFormat != format) { + ReleaseFramebufferObjects(); + fFramebufferFormat = format; + } +} + void plMetalDevice::CheckTexture(plMetalDevice::TextureRef *tRef) { if (!tRef->fTexture) @@ -964,6 +981,8 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) fCurrentCommandBuffer = fCommandQueue->commandBuffer(); fCurrentCommandBuffer->retain(); + SetFramebufferFormat(drawable->texture()->pixelFormat()); + bool depthNeedsRebuild = fCurrentDrawableDepthTexture == nullptr; depthNeedsRebuild |= drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || drawable->texture()->height() != fCurrentDrawableDepthTexture->height(); @@ -1216,7 +1235,7 @@ void plMetalDevice::CreateGammaAdjustState() { library->release(); - gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(fFramebufferFormat); NS::Error *error; fGammaAdjustState->release(); @@ -1225,6 +1244,10 @@ void plMetalDevice::CreateGammaAdjustState() { void plMetalDevice::PostprocessIntoDrawable() { + if (!fGammaAdjustState) { + CreateGammaAdjustState(); + } + //Gamma adjust MTL::RenderPassDescriptor* gammaPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionDontCare); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 73c654fa83..776e999e33 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -185,6 +185,8 @@ class plMetalDevice void EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma); + MTL::PixelFormat GetFramebufferFormat() { return fFramebufferFormat; }; + private: struct plMetalPipelineRecord { @@ -214,7 +216,11 @@ class plMetalDevice MTL::Texture* fGammaLUTTexture; + void SetFramebufferFormat(MTL::PixelFormat format); + private: + MTL::PixelFormat fFramebufferFormat; + //these are internal bits for backing the current render pass //private because the functions should be used to keep a consistant //render pass state @@ -250,6 +256,7 @@ class plMetalDevice void BeginNewRenderPass(); void ReleaseSamplerStates(); + void ReleaseFramebufferObjects(); //Blur states std::unordered_map fBlurShaders; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 236c33e08b..f112f28cf1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -170,6 +170,10 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons fMaxLayersAtOnce = 8; + // Default our output format to 8 bit BGRA. Client may immediately change this to + // the actual framebuffer format. + SetFramebufferFormat(MTL::PixelFormatBGRA8Unorm); + // Alloc half our simultaneous textures to piggybacks. // Won't hurt us unless we try to many things at once. fMaxPiggyBacks = fMaxLayersAtOnce >> 1; @@ -263,7 +267,7 @@ plTextFont *plMetalPipeline::MakeTextFont(ST::string face, uint16_t size) { plTextFont *font; - font = new plMetalTextFont( this, fDevice.fMetalDevice ); + font = new plMetalTextFont( this, &fDevice ); if (font == nullptr) return nullptr; font->Create( face, size ); @@ -878,6 +882,29 @@ bool plMetalPipeline::SetGamma(const uint16_t *const tabR, const uint16_t *const return true; } +bool plMetalPipeline::SetGamma10(const uint16_t *const tabR, const uint16_t *const tabG, const uint16_t *const tabB) +{ + //allocate a new buffer every time so we don't cause problems with a running render pass + if(fDevice.fGammaLUTTexture) { + fDevice.fGammaLUTTexture->release(); + fDevice.fGammaLUTTexture = nullptr; + } + + MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); + texDescriptor->setTextureType(MTL::TextureType1DArray); + texDescriptor->setWidth(1024); + texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); + texDescriptor->setArrayLength(3); + + fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); + + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 0, tabR, 1024 * sizeof(uint16_t), 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 1, tabG, 1024 * sizeof(uint16_t), 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 2, tabB, 1024 * sizeof(uint16_t), 0); + + return true; +} + bool plMetalPipeline::CaptureScreen(plMipmap *dest, bool flipVertical, uint16_t desiredWidth, uint16_t desiredHeight) { //FIXME: Screen capture @@ -2586,9 +2613,12 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) plMetalPlateManager *pm = (plMetalPlateManager *)fPlateMgr; - if(fState.fCurrentPipelineState != pm->fPlateRenderPipelineState) { - fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pm->fPlateRenderPipelineState); - fState.fCurrentPipelineState = pm->fPlateRenderPipelineState; + plMetalPlatePipelineState state(&fDevice); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = state.GetRenderPipelineState(); + + if(fState.fCurrentPipelineState != linkedPipeline->pipelineState) { + fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); + fState.fCurrentPipelineState = linkedPipeline->pipelineState; } float alpha = material->GetLayer(0)->GetOpacity(); fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&alpha, sizeof(float), 6); @@ -2618,7 +2648,7 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&uniforms, sizeof(VertexUniforms), BufferIndexState); - pm->encodeVertexBuffer(fDevice.CurrentRenderCommandEncoder()); + pm->EncodeDraw(fDevice.CurrentRenderCommandEncoder()); IPopPiggyBacks(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 04b6cabf96..7755f0f88d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -132,6 +132,8 @@ class plMetalPipeline : public pl3DPipeline void LoadResources() override; bool SetGamma(float eR, float eG, float eB) override; bool SetGamma(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) override; + bool SetGamma10(const uint16_t *const tabR, const uint16_t *const tabG, const uint16_t *const tabB) override; + bool Supports10BitGamma() const override { return true; }; bool CaptureScreen(plMipmap* dest, bool flipVertical = false, uint16_t desiredWidth = 0, uint16_t desiredHeight = 0) override; plMipmap* ExtractMipMap(plRenderTarget* targ) override; void GetSupportedDisplayModes(std::vector *res, int ColorDepth = 32 ) override; @@ -163,8 +165,12 @@ class plMetalPipeline : public pl3DPipeline uint32_t IGetBufferFormatSize( uint8_t format ) const; plRenderTarget* PopRenderTarget() override; + + MTL::PixelFormat GetFramebufferFormat() { return fDevice.GetFramebufferFormat(); }; + void SetFramebufferFormat(MTL::PixelFormat format) { fDevice.SetFramebufferFormat(format); }; + private: - MTL::RenderPipelineState* fPipelineState; + VertexUniforms* fCurrentRenderPassUniforms; void FindFragFunction(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index f449c2a4d5..35ba34869c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -71,7 +71,7 @@ class plMetalPipelineState { virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) = 0; virtual const NS::String* GetDescription() = 0; - virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) = 0; protected: plMetalDevice* fDevice; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 14cc510547..16cc251dd7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -49,52 +49,12 @@ plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) : plPlateManager(pipe), fVtxBuffer(0) { - //Compile the shaders and link our pipeline for plates - MTL::Library *library = pipe->fDevice.fMetalDevice->newDefaultLibrary(); - MTL::Function *fragFunction = library->newFunction( - NS::String::string("fragmentShader", NS::ASCIIStringEncoding) - ); - MTL::Function *vertFunction = library->newFunction( - NS::String::string("plateVertexShader", NS::ASCIIStringEncoding) - ); - MTL::RenderPipelineDescriptor *descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); - descriptor->setFragmentFunction(fragFunction); - descriptor->setVertexFunction(vertFunction); - descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); - descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - descriptor->setDepthAttachmentPixelFormat(MTL::PixelFormatDepth32Float_Stencil8); - descriptor->setSampleCount(pipe->fDevice.fSampleCount); - - //create the descriptor of the vertex array - MTL::VertexDescriptor *vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); - vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); - vertexDescriptor->attributes()->object(0)->setBufferIndex(VertexAttributePosition); - vertexDescriptor->attributes()->object(0)->setOffset(0); - vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); - vertexDescriptor->attributes()->object(1)->setBufferIndex(VertexAttributeTexcoord); - vertexDescriptor->attributes()->object(1)->setOffset(0); - - vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 2); - vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 2); - - descriptor->setVertexDescriptor(vertexDescriptor); MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); depthDescriptor->setDepthWriteEnabled(false); fDepthState = pipe->fDevice.fMetalDevice->newDepthStencilState(depthDescriptor); depthDescriptor->release(); - - NS::Error *error; - fPlateRenderPipelineState = pipe->fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); - library->release(); - fragFunction->release(); - vertFunction->release(); - descriptor->release(); } void plMetalPlateManager::ICreateGeometry() @@ -122,7 +82,7 @@ void plMetalPlateManager::ICreateGeometry() } } -void plMetalPlateManager::encodeVertexBuffer(MTL::RenderCommandEncoder *encoder) { +void plMetalPlateManager::EncodeDraw(MTL::RenderCommandEncoder *encoder) { encoder->setVertexBuffer(fVtxBuffer, 0, VertexAttributePosition); encoder->setVertexBuffer(fVtxBuffer, offsetof(plateVertexBuffer, uv), VertexAttributeTexcoord); @@ -154,3 +114,46 @@ plMetalPlateManager::~plMetalPlateManager() IReleaseGeometry(); } + + +bool plMetalPlatePipelineState::IsEqual(const plMetalPipelineState &p) const { + return true; +} + +plMetalPipelineState *plMetalPlatePipelineState::Clone() { + return new plMetalPlatePipelineState(fDevice); +} + +const MTL::Function *plMetalPlatePipelineState::GetVertexFunction(MTL::Library *library) { + return library->newFunction(NS::MakeConstantString("plateVertexShader")); +} + +const MTL::Function *plMetalPlatePipelineState::GetFragmentFunction(MTL::Library *library) { + return library->newFunction(NS::MakeConstantString("fragmentShader")); +} + +const NS::String *plMetalPlatePipelineState::GetDescription() { + return NS::MakeConstantString("Plate Pipeline State"); +} + +void plMetalPlatePipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { + descriptor->setBlendingEnabled(true); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); +} + +void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) { + vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(0)->setBufferIndex(VertexAttributePosition); + vertexDescriptor->attributes()->object(0)->setOffset(0); + vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); + vertexDescriptor->attributes()->object(1)->setBufferIndex(VertexAttributeTexcoord); + vertexDescriptor->attributes()->object(1)->setOffset(0); + + vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 2); + vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 2); +} + +void plMetalPlatePipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const { + +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h index ffd956df22..dc74468cee 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -48,8 +48,29 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #include #include "hsPoint2.h" +#include "plMetalPipelineState.h" class plMetalPipeline; +class plMetalDevice; + +class plMetalPlatePipelineState : public plMetalPipelineState +{ +public: + plMetalPlatePipelineState(plMetalDevice* device): plMetalPipelineState(device) { }; + virtual bool IsEqual(const plMetalPipelineState &p) const override; + virtual uint16_t GetID() const override { return 5; }; + virtual plMetalPipelineState* Clone() override; + virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override; + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + virtual const NS::String* GetDescription() override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; + + void ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) override; + + void GetFunctionConstants(MTL::FunctionConstantValues *) const override; + +}; class plMetalPlateManager : public plPlateManager { @@ -59,8 +80,7 @@ class plMetalPlateManager : public plPlateManager void IDrawToDevice(plPipeline *pipe) override; void ICreateGeometry(); void IReleaseGeometry(); - MTL::RenderPipelineState *fPlateRenderPipelineState; - void encodeVertexBuffer(MTL::RenderCommandEncoder *encoder); + void EncodeDraw(MTL::RenderCommandEncoder *encoder); ~plMetalPlateManager(); private: struct plateVertexBuffer { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp index cccd9f82f3..7c899419ba 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp @@ -67,11 +67,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com const uint32_t kNumVertsInBuffer(4608); uint32_t plMetalTextFont::fBufferCursor = 0; -MTL::RenderPipelineState* plMetalTextFont::fRenderState = nullptr; //// Constructor & Destructor ///////////////////////////////////////////////// -plMetalTextFont::plMetalTextFont( plPipeline *pipe, MTL::Device *device ) : plTextFont( pipe ), fTexture() +plMetalTextFont::plMetalTextFont( plPipeline *pipe, plMetalDevice* device ) : plTextFont( pipe ), fTexture() { fDevice = device; fPipeline = (plMetalPipeline *)pipe; @@ -92,7 +91,7 @@ void plMetalTextFont::ICreateTexture( uint16_t *data ) MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatRGBA8Unorm, fTextureWidth, fTextureHeight, false); fTexture->release(); - fTexture = fDevice->newTexture(descriptor); + fTexture = fDevice->fMetalDevice->newTexture(descriptor); fTexture->setLabel(NS::MakeConstantString("Font texture")); struct InDataValues { @@ -145,24 +144,6 @@ void plMetalTextFont::ICreateTexture( uint16_t *data ) void plMetalTextFont::CreateShared(plMetalDevice* device) { - MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init()->autorelease(); - MTL::Library* library = device->fMetalDevice->newDefaultLibrary(); - - MTL::Function* vertFunction = library->newFunction(NS::MakeConstantString("textFontVertexShader")); - MTL::Function* fragFunction = library->newFunction(NS::MakeConstantString("textFontFragmentShader")); - - descriptor->setVertexFunction(vertFunction); - descriptor->setFragmentFunction(fragFunction); - descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); - descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); - descriptor->setSampleCount(device->fSampleCount); - descriptor->setDepthAttachmentPixelFormat(MTL::PixelFormatDepth32Float_Stencil8); - - descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - - NS::Error* error; - fRenderState = device->fMetalDevice->newRenderPipelineState(descriptor, &error); } void plMetalTextFont::ReleaseShared(MTL::Device* device) @@ -233,7 +214,9 @@ void plMetalTextFont::IDrawPrimitive( uint32_t count, plFontVertex *array ) { plFontVertex *v; - fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(fRenderState); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); + + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); const uint maxCount = 4096/(sizeof(plFontVertex) * 3); uint drawm = 0; while(count > 0) { @@ -289,8 +272,9 @@ void plMetalTextFont::IDrawPrimitive( uint32_t count, plFontVertex *array ) void plMetalTextFont::IDrawLines( uint32_t count, plFontVertex *array ) { + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); - fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(fRenderState); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array, count * 2 * sizeof( plFontVertex ), 0); matrix_float4x4 mat = matrix_identity_float4x4; @@ -378,3 +362,38 @@ void plMetalTextFont::RestoreStates() fDevice->SetTransform( D3DTS_TEXTURE0, &d3dIdentityMatrix );*/ } + + +bool plMetalTextFontPipelineState::IsEqual(const plMetalPipelineState &p) const { + return true; +} + +plMetalPipelineState *plMetalTextFontPipelineState::Clone() { + return new plMetalTextFontPipelineState(fDevice); +} + +const MTL::Function *plMetalTextFontPipelineState::GetVertexFunction(MTL::Library *library) { + return library->newFunction(NS::MakeConstantString("textFontVertexShader")); +} + +const MTL::Function *plMetalTextFontPipelineState::GetFragmentFunction(MTL::Library *library) { + return library->newFunction(NS::MakeConstantString("textFontFragmentShader")); +} + +const NS::String *plMetalTextFontPipelineState::GetDescription() { + return NS::MakeConstantString("Font Rendering"); +} + +void plMetalTextFontPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { + + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); +} + +void plMetalTextFontPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) { + return; +} + +void plMetalTextFontPipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const { + return; +} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h index 17e6a57858..35fa5dbcf8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -44,18 +44,38 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plPipeline/plTextFont.h" #include "plMetalPipeline.h" +#include "plMetalPipelineState.h" #include //// plDXTextFont Class Definition /////////////////////////////////////////// class plPipeline; +class plMetalDevice; + +class plMetalTextFontPipelineState : public plMetalPipelineState +{ +public: + plMetalTextFontPipelineState(plMetalDevice* device): plMetalPipelineState(device) { }; + virtual bool IsEqual(const plMetalPipelineState &p) const override; + virtual uint16_t GetID() const override { return 6; }; + virtual plMetalPipelineState* Clone() override; + virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override; + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + virtual const NS::String* GetDescription() override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; + + void ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) override; + + void GetFunctionConstants(MTL::FunctionConstantValues *) const override; + +}; class plMetalTextFont : public plTextFont { protected: static uint32_t fBufferCursor; - static MTL::RenderPipelineState* fRenderState; void ICreateTexture(uint16_t *data) override; void IInitStateBlocks() override; @@ -63,12 +83,12 @@ class plMetalTextFont : public plTextFont void IDrawLines(uint32_t count, plFontVertex *array) override; MTL::Texture* fTexture; - MTL::Device* fDevice; + plMetalDevice* fDevice; plMetalPipeline* fPipeline; public: - plMetalTextFont( plPipeline *pipe, MTL::Device *device ); + plMetalTextFont( plPipeline *pipe, plMetalDevice *device ); ~plMetalTextFont(); static void CreateShared(plMetalDevice* device); From 7a8f9d373f3995d4a1a76cd0ac7c770b600b7328 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 4 Aug 2022 18:18:46 -0700 Subject: [PATCH 094/165] Removing Poisson Disk Shadow Treatment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shadow blurring would be the best improvemnt for now (along with upgrading shadow resolution.) This could come back in the future - but for now it’s not really helping much. --- .../ShaderSrc/FixedPipelineShaders.metal | 43 +------------------ 1 file changed, 1 insertion(+), 42 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index f6a3e956a2..96c41acf0c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -678,42 +678,6 @@ fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) return half4(1.0h, 1.0h, 1.0h, currentAlpha); } - - - constant float2 poissonDisk[16] = { - float2( -0.94201624, -0.39906216 ), - float2( 0.94558609, -0.76890725 ), - float2( -0.094184101, -0.92938870 ), - float2( 0.34495938, 0.29387760 ), - float2( -0.91588581, 0.45771432 ), - float2( -0.81544232, -0.87912464 ), - float2( -0.38277543, 0.27676845 ), - float2( 0.97484398, 0.75648379 ), - float2( 0.44323325, -0.97511554 ), - float2( 0.53742981, -0.47373420 ), - float2( -0.26496911, -0.41893023 ), - float2( 0.79197514, 0.19090188 ), - float2( -0.24188840, 0.99706507 ), - float2( -0.81409955, 0.91437590 ), - float2( 0.19984126, 0.78641367 ), - float2( 0.14383161, -0.14100790 ) - }; - - - const float rand(float3 co){ - //since opengl es only garantees that mediump will be 10 bits, we need to try and - //keep the numbers low. The actual constants are mostly arbilitary chosen with the - //goal to give different weightings to the first or seccond element - - float3 product = float3( sin( dot(co, float3(0.129898,0.78233, 0.129898))), - sin( dot(co, float3(0.689898,0.23233, 0.689898))), - sin( dot(co, float3(0.434198,0.51833, 0.434198))) ); - - - float3 weighting = float3(4.37585453723, 2.465973, 3.18438); - - return fract(dot(weighting, product)); - } fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], texture2d texture [[ texture(16) ]], @@ -729,12 +693,7 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], min_filter::linear, address::clamp_to_edge); - half4 currentColor = 0.0; - for (int i=0;i<4;i++){ - int index = int(16.0*rand(floor(in.position.xyz*1000.0) + i))%16; - currentColor += 0.25 * texture.sample(colorSample, sampleCoords.xy + poissonDisk[index]/700.0); - } - //half4 currentColor = texture.sample(colorSample, sampleCoords.xy); + half4 currentColor = texture.sample(colorSample, sampleCoords.xy); currentColor.rgb *= in.vtxColor.rgb; const float2 LUTCoords = in.texCoord2.xy; From 50d03cac2e1303d7b3ded9da230ee762a8456ec6 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 4 Aug 2022 18:23:36 -0700 Subject: [PATCH 095/165] Enabling staging textures on all platforms All textures now get blitted to private memory. --- .../pfMetalPipeline/plMetalDevice.cpp | 18 +----------------- 1 file changed, 1 insertion(+), 17 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 97cd94fab4..03647d1cce 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -900,23 +900,11 @@ void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* im descriptor->setMipmapLevelCount(tRef->fLevels + 1); } - //if device has unified memory, set storage mode to shared - if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { - descriptor->setStorageMode(MTL::StorageModeShared); - } else { - descriptor->setStorageMode(MTL::StorageModeManaged); - } + descriptor->setStorageMode(MTL::StorageModeManaged); tRef->fTexture = fMetalDevice->newTexture(descriptor); PopulateTexture( tRef, img, 0); - if(!fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { - descriptor->setStorageMode(MTL::StorageModePrivate); - MTL::Texture* privateTexture = fMetalDevice->newTexture(descriptor); - BlitTexture(tRef->fTexture, privateTexture); - tRef->fTexture->autorelease(); - tRef->fTexture = privateTexture; - } //} @@ -931,10 +919,6 @@ void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plCubic descriptor->setMipmapLevelCount(tRef->fLevels + 1); } descriptor->setUsage(MTL::TextureUsageShaderRead); - //if device has unified memory, set storage mode to shared - if(fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { - descriptor->setStorageMode(MTL::StorageModeShared); - } tRef->fTexture = fMetalDevice->newTexture(descriptor); From 56b11e63eb9c8e69030d5d55915861d23fa36a84 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 4 Aug 2022 18:56:57 -0700 Subject: [PATCH 096/165] Re-factoring how lights are encoded Lights can now go into a variable length buffer - trying to reduce memory traffic. Ideally the pipeline should probably hold these as a vector, but this is a start. --- .../ShaderSrc/FixedPipelineShaders.metal | 49 +++---- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 14 +- .../pfMetalPipeline/plMetalPipeline.cpp | 132 +++++++++--------- .../pfMetalPipeline/plMetalPipeline.h | 3 +- 4 files changed, 98 insertions(+), 100 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 96c41acf0c..0164de7bf2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -237,24 +237,25 @@ typedef struct vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + constant plMetalLights & lights [[ buffer(BufferIndexLights) ]], constant float4x4 & blendMatrix1 [[ buffer(BufferIndexBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) { ColorInOut out; //we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. const half4 inColor = half4(in.color.b, in.color.g, in.color.r, in.color.a) / half4(255.0f); - const half4 MAmbient = half4(mix(inColor, uniforms.ambientCol, uniforms.ambientSrc)); - const half4 MDiffuse = half4(mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc)); - const half4 MEmissive = half4(mix(inColor, uniforms.emissiveCol, uniforms.emissiveSrc)); + const half3 MAmbient = mix(inColor.rgb, uniforms.ambientCol, uniforms.ambientSrc); + const half4 MDiffuse = mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc); + const half3 MEmissive = mix(inColor.rgb, uniforms.emissiveCol, uniforms.emissiveSrc); //const half4 MSpecular = half4(mix(inColor, uniforms.specularCol, uniforms.specularSrc)); - half4 LAmbient = half4(0.0, 0.0, 0.0, 0.0); - half4 LDiffuse = half4(0.0, 0.0, 0.0, 0.0); + half3 LAmbient = half3(0.0, 0.0, 0.0); + half3 LDiffuse = half3(0.0, 0.0, 0.0); - float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz; + const float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz; - for (size_t i = 0; i < 8; i++) { - constant plMetalShaderLightSource *lightSource = &uniforms.lampSources[i]; + for (size_t i = 0; i < lights.count; i++) { + constant const plMetalShaderLightSource *lightSource = &lights.lampSources[i]; if(lightSource->scale == 0.0h) continue; @@ -273,7 +274,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], attenuation = 1.0 / (lightSource->constAtten + lightSource->linAtten * distance + lightSource->quadAtten * pow(distance, 2.0)); - if (uniforms.lampSources[i].spotProps.x > 0.0) { + if (lightSource->spotProps.x > 0.0) { // Spot Light with cone falloff const float a = dot(direction.xyz, normalize(-lightSource->direction).xyz); const float theta = lightSource->spotProps.y; @@ -284,14 +285,15 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], } } - LAmbient.rgb = LAmbient.rgb + half3(attenuation * (uniforms.lampSources[i].ambient.rgb * uniforms.lampSources[i].scale)); + LAmbient.rgb = LAmbient.rgb + half3(attenuation * (lightSource->ambient.rgb * lightSource->scale)); float3 dotResult = dot(Ndirection, direction); - LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (uniforms.lampSources[i].diffuse.rgb * uniforms.lampSources[i].scale) * half3(max(0.0, dotResult) * attenuation); + LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (lightSource->diffuse.rgb * lightSource->scale) * half3(max(0.0, dotResult) * attenuation); } - const half4 ambient = clamp((MAmbient) * (half4(uniforms.globalAmb) + LAmbient), 0.0, 1.0); - const half4 diffuse = clamp(LDiffuse, 0.0, 1.0); - const half4 material = clamp(ambient + diffuse + half4(MEmissive), 0.0, 1.0); + const half3 ambient = clamp((MAmbient.rgb) * (uniforms.globalAmb.rgb + LAmbient.rgb), 0.0, 1.0); + const half3 diffuse = clamp(LDiffuse.rgb, 0.0, 1.0); + const half4 material = half4(clamp(ambient + diffuse + MEmissive.rgb, 0.0, 1.0), + abs(uniforms.invVtxAlpha - MDiffuse.a)); out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); @@ -318,22 +320,9 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], const float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0)); - if(hasLayer1) - out.texCoord1 = uniforms.sampleLocation(0, &in.texCoord1, normal, vCamPosition); - if(hasLayer2) - out.texCoord2 = uniforms.sampleLocation(1, &in.texCoord1, normal, vCamPosition); - if(hasLayer3) - out.texCoord3 = uniforms.sampleLocation(2, &in.texCoord1, normal, vCamPosition); - if(hasLayer4) - out.texCoord4 = uniforms.sampleLocation(3, &in.texCoord1, normal, vCamPosition); - if(hasLayer5) - out.texCoord5 = uniforms.sampleLocation(4, &in.texCoord1, normal, vCamPosition); - if(hasLayer6) - out.texCoord5 = uniforms.sampleLocation(5, &in.texCoord1, normal, vCamPosition); - if(hasLayer7) - out.texCoord7 = uniforms.sampleLocation(6, &in.texCoord1, normal, vCamPosition); - if(hasLayer8) - out.texCoord8 = uniforms.sampleLocation(7, &in.texCoord1, normal, vCamPosition); + for(size_t layer=0; layerCurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), BufferIndexState); + + plMetalLights* lights = &fDevice->fPipeline->fLights; + size_t lightSize = offsetof(plMetalLights, lampSources) + (sizeof(plMetalShaderLightSource) * lights->count); + + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(lights, sizeof(plMetalLights), BufferIndexLights); fDevice->CurrentRenderCommandEncoder()->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, fNumTris, MTL::IndexTypeUInt16, fDevice->fCurrentIndexBuffer, (sizeof(uint16_t) * fIStart)); } @@ -1290,21 +1295,22 @@ void plMetalPipeline::IRenderProjections(const plRenderPrimFunc& render, const p void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li, const plMetalVertexBufferRef* vRef) { // Enable the projecting light only. - IEnableLight(7, li); + IEnableLight(0, li); + fLights.count = 1; plLayerInterface* proj = li->GetProjection(); CheckTextureRef(proj); plMetalTextureRef* tex = (plMetalTextureRef*)proj->GetTexture()->GetDeviceRef(); - IScaleLight(7, true); + IScaleLight(0, true); - fCurrentRenderPassUniforms->ambientCol = half4(0.0); + fCurrentRenderPassUniforms->ambientCol = half3(0.0); fCurrentRenderPassUniforms->ambientSrc = 1.0; fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->emissiveSrc = 1.0; fCurrentRenderPassUniforms->specularSrc = 1.0; fCurrentRenderPassUniforms->fogValues = {0.0, 0.0f}; - fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0}; fCurrentRenderPassUniforms->diffuseCol = {1.0, 1.0, 1.0, 1.0}; @@ -1365,23 +1371,20 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM IPushProjPiggyBack(proj); // Enable the projecting light only. - IEnableLight(7, li); + IEnableLight(0, li); + fLights.count = 1; AppendLayerInterface(&layLightBase, false); IHandleMaterialPass( material, iPass, &span, vRef, false ); - //FIXME: Hard setting of light - IScaleLight(7, true); + IScaleLight(0, true); // Do the render with projection. render.RenderPrims(); RemoveLayerInterface(&layLightBase, false); - // Disable the projecting light - IDisableLight(7); - // Pop it's projected texture off piggyback IPopProjPiggyBacks(); @@ -1486,7 +1489,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) IHandleMaterialPass(material, pass, &span, vRef); if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) { - fCurrentRenderPassUniforms->ambientCol = {1.0f, 1.0f, 1.0f, 1.0f}; + fCurrentRenderPassUniforms->ambientCol = {1.0f, 1.0f, 1.0f}; fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->ambientSrc = 1.0; @@ -2250,11 +2253,11 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye { fCurrentRenderPassUniforms->globalAmb = { 1.0, 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0 }; fCurrentRenderPassUniforms->diffuseCol = { 1.0, 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->specularCol = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->specularCol = { 1.0, 1.0, 1.0 }; fCurrentRenderPassUniforms->ambientSrc = 1.0; fCurrentRenderPassUniforms->diffuseSrc = 1.0; @@ -2280,31 +2283,31 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye { if (state.fShadeFlags & hsGMatState::kShadeWhite) { fCurrentRenderPassUniforms->globalAmb = { 1.0, 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0 }; } else if (IsDebugFlagSet(plPipeDbg::kFlagNoPreShade)) { fCurrentRenderPassUniforms->globalAmb = { 0.0, 0.0, 0.0, 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0, 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0 }; } else { hsColorRGBA amb = currLayer->GetPreshadeColor(); fCurrentRenderPassUniforms->globalAmb = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.0 }; + fCurrentRenderPassUniforms->ambientCol = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b) }; } hsColorRGBA dif = currLayer->GetRuntimeColor(); fCurrentRenderPassUniforms->diffuseCol = { static_cast(dif.r), static_cast(dif.g), static_cast(dif.b), static_cast(currLayer->GetOpacity()) }; hsColorRGBA em = currLayer->GetAmbientColor(); - fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b), 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b) }; // Set specular properties if (state.fShadeFlags & hsGMatState::kShadeSpecular) { hsColorRGBA spec = currLayer->GetSpecularColor(); - fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), 1.0 }; + fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b) }; #if 0 mat.Power = currLayer->GetSpecularPower(); #endif } else { - fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0 }; } fCurrentRenderPassUniforms->diffuseSrc = 1.0; @@ -2323,11 +2326,11 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye case plSpan::kLiteVtxPreshaded: // Vtx preshaded { - fCurrentRenderPassUniforms->globalAmb = { 0.0, 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->globalAmb = { 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0 }; fCurrentRenderPassUniforms->diffuseCol = { 0.0, 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->emissiveCol = { 0.0, 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->emissiveCol = { 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0 }; fCurrentRenderPassUniforms->diffuseSrc = 0.0; fCurrentRenderPassUniforms->ambientSrc = 1.0; @@ -2345,21 +2348,21 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye case plSpan::kLiteVtxNonPreshaded: // Vtx non-preshaded { - fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0 }; fCurrentRenderPassUniforms->diffuseCol = { 0.0, 0.0, 0.0, 0.0 }; hsColorRGBA em = currLayer->GetAmbientColor(); - fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b), 1.0 }; + fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b) }; // Set specular properties if (state.fShadeFlags & hsGMatState::kShadeSpecular) { hsColorRGBA spec = currLayer->GetSpecularColor(); - fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), 1.0 }; + fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b) }; #if 0 mat.Power = currLayer->GetSpecularPower(); #endif } else { - fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0 }; } hsColorRGBA amb = currLayer->GetPreshadeColor(); @@ -2491,6 +2494,7 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef } onLights.clear(); } + fLights.count = i; for (; i < numLights; i++) { IDisableLight(i); @@ -2500,13 +2504,13 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) { hsColorRGBA amb = light->GetAmbient(); - fCurrentRenderPassUniforms->lampSources[i].ambient = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a) }; + fLights.lampSources[i].ambient = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a) }; hsColorRGBA diff = light->GetDiffuse(); - fCurrentRenderPassUniforms->lampSources[i].diffuse = { static_cast(diff.r), static_cast(diff.g), static_cast(diff.b), static_cast(diff.a) }; + fLights.lampSources[i].diffuse = { static_cast(diff.r), static_cast(diff.g), static_cast(diff.b), static_cast(diff.a) }; hsColorRGBA spec = light->GetSpecular(); - fCurrentRenderPassUniforms->lampSources[i].specular = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), static_cast(spec.a) }; + fLights.lampSources[i].specular = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), static_cast(spec.a) }; plDirectionalLightInfo* dirLight = nullptr; plOmniLightInfo* omniLight = nullptr; @@ -2515,35 +2519,35 @@ void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) if ((dirLight = plDirectionalLightInfo::ConvertNoRef(light)) != nullptr) { hsVector3 lightDir = dirLight->GetWorldDirection(); - fCurrentRenderPassUniforms->lampSources[i].position = { lightDir.fX, lightDir.fY, lightDir.fZ, 0.0 }; - fCurrentRenderPassUniforms->lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; + fLights.lampSources[i].position = { lightDir.fX, lightDir.fY, lightDir.fZ, 0.0 }; + fLights.lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; - fCurrentRenderPassUniforms->lampSources[i].constAtten = 1.0f; - fCurrentRenderPassUniforms->lampSources[i].linAtten = 0.0f; - fCurrentRenderPassUniforms->lampSources[i].quadAtten = 0.0f; + fLights.lampSources[i].constAtten = 1.0f; + fLights.lampSources[i].linAtten = 0.0f; + fLights.lampSources[i].quadAtten = 0.0f; } else if ((omniLight = plOmniLightInfo::ConvertNoRef(light)) != nullptr) { hsPoint3 pos = omniLight->GetWorldPosition(); - fCurrentRenderPassUniforms->lampSources[i].position = { pos.fX, pos.fY, pos.fZ, 1.0 }; + fLights.lampSources[i].position = { pos.fX, pos.fY, pos.fZ, 1.0 }; // TODO: Maximum Range - fCurrentRenderPassUniforms->lampSources[i].constAtten = omniLight->GetConstantAttenuation(); - fCurrentRenderPassUniforms->lampSources[i].linAtten = omniLight->GetLinearAttenuation(); - fCurrentRenderPassUniforms->lampSources[i].quadAtten = omniLight->GetQuadraticAttenuation(); + fLights.lampSources[i].constAtten = omniLight->GetConstantAttenuation(); + fLights.lampSources[i].linAtten = omniLight->GetLinearAttenuation(); + fLights.lampSources[i].quadAtten = omniLight->GetQuadraticAttenuation(); if (!omniLight->GetProjection() && (spotLight = plSpotLightInfo::ConvertNoRef(omniLight)) != nullptr) { hsVector3 lightDir = spotLight->GetWorldDirection(); - fCurrentRenderPassUniforms->lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; + fLights.lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; float falloff = spotLight->GetFalloff(); float theta = cosf(spotLight->GetSpotInner()); float phi = cosf(spotLight->GetProjection() ? hsConstants::half_pi : spotLight->GetSpotOuter()); - fCurrentRenderPassUniforms->lampSources[i].spotProps = { falloff, theta, phi }; + fLights.lampSources[i].spotProps = { falloff, theta, phi }; } else { - fCurrentRenderPassUniforms->lampSources[i].spotProps = { 0.0, 0.0, 0.0 }; + fLights.lampSources[i].spotProps = { 0.0, 0.0, 0.0 }; } } else { @@ -2553,20 +2557,20 @@ void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) void plMetalPipeline::IDisableLight(size_t i) { - fCurrentRenderPassUniforms->lampSources[i].position = { 0.0f, 0.0f, 0.0f, 0.0f }; - fCurrentRenderPassUniforms->lampSources[i].ambient = { 0.0f, 0.0f, 0.0f, 0.0f }; - fCurrentRenderPassUniforms->lampSources[i].diffuse = { 0.0f, 0.0f, 0.0f, 0.0f }; - fCurrentRenderPassUniforms->lampSources[i].specular = { 0.0f, 0.0f, 0.0f, 0.0f }; - fCurrentRenderPassUniforms->lampSources[i].constAtten = { 1.0f }; - fCurrentRenderPassUniforms->lampSources[i].linAtten = { 0.0f }; - fCurrentRenderPassUniforms->lampSources[i].quadAtten = { 0.0f }; - fCurrentRenderPassUniforms->lampSources[i].scale = { 0.0f }; + fLights.lampSources[i].position = { 0.0f, 0.0f, 0.0f, 0.0f }; + fLights.lampSources[i].ambient = { 0.0f, 0.0f, 0.0f, 0.0f }; + fLights.lampSources[i].diffuse = { 0.0f, 0.0f, 0.0f, 0.0f }; + fLights.lampSources[i].specular = { 0.0f, 0.0f, 0.0f, 0.0f }; + fLights.lampSources[i].constAtten = { 1.0f }; + fLights.lampSources[i].linAtten = { 0.0f }; + fLights.lampSources[i].quadAtten = { 0.0f }; + fLights.lampSources[i].scale = { 0.0f }; } void plMetalPipeline::IScaleLight(size_t i, float scale) { scale = int(scale * 1.e1f) * 1.e-1f; - fCurrentRenderPassUniforms->lampSources[i].scale = scale; + fLights.lampSources[i].scale = scale; } void plMetalPipeline::IDrawPlate(plPlate* plate) @@ -2660,17 +2664,17 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) //we'll just let them push/pop the current state. void plMetalPipeline::PushCurrentLightSources() { - plMetalShaderLightSource *lightSources = new plMetalShaderLightSource[8](); - memcpy(lightSources, fCurrentRenderPassUniforms->lampSources, sizeof(plMetalShaderLightSource[8])); + plMetalLights *lightSources = new plMetalLights(); + memcpy(lightSources, &fLights, sizeof(plMetalLights)); fLightSourceStack.emplace_back(lightSources); } void plMetalPipeline::PopCurrentLightSources() { hsAssert(fLightSourceStack.size() > 0, "Asked to pop light sources but none on stack"); - plMetalShaderLightSource *lightSources = fLightSourceStack.back(); + plMetalLights *lightSources = fLightSourceStack.back(); fLightSourceStack.pop_back(); - memcpy(fCurrentRenderPassUniforms->lampSources, lightSources, sizeof(plMetalShaderLightSource[8])); + memcpy(&fLights, lightSources, sizeof(plMetalLights)); delete lightSources; } @@ -3499,10 +3503,7 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) // to fade out a shadow as it gets too far in the distance to matter. void plMetalPipeline::ISetupShadowLight(plShadowSlave* slave) { - //FIXME: Do we need to clear the fCurrentRenderPassUniforms->lampSources array? - //Feels like we could catch lights from a previous pass - plMetalShaderLightSource lRef = fCurrentRenderPassUniforms->lampSources[0]; - memset(&lRef, 0, sizeof(lRef)); + plMetalShaderLightSource lRef = fLights.lampSources[0]; lRef.diffuse.r = lRef.diffuse.g @@ -3538,7 +3539,8 @@ void plMetalPipeline::ISetupShadowLight(plShadowSlave* slave) } //fD3DDevice->SetLight( lRef->fD3DIndex, &lRef->fD3DInfo ); - fCurrentRenderPassUniforms->lampSources[0] = lRef; + fLights.lampSources[0] = lRef; + fLights.count = 1; //Not sure hot to link lights in Metal. Do we even need to? //slave->fLightIndex = lRef->fD3DIndex; @@ -4009,7 +4011,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con // than what we're currently set for, set it again. //if( selfShadowNow != fShadows[i]->fSelfShadowOn ) //{ - plMetalShaderLightSource lRef = fCurrentRenderPassUniforms->lampSources[0]; + plMetalShaderLightSource lRef = fLights.lampSources[0]; // We lower the power on self shadowing, because the artists like to // crank up the shadow strength to huge values to get a darker shadow @@ -4028,7 +4030,8 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con lRef.diffuse.r = lRef.diffuse.b = lRef.diffuse.g = fShadows[i]->fPower; } lRef.scale = 1.0; - fCurrentRenderPassUniforms->lampSources[0] = lRef; + fLights.lampSources[0] = lRef; + fLights.count = 1; // record which our intensity is now set for. fShadows[i]->fSelfShadowOn = selfShadowNow; @@ -4149,6 +4152,7 @@ void plMetalPipeline::IDisableLightsForShadow() { IDisableLight(i); } + fLights.count = 0; } // ISetupShadowSlaveTextures ////////////////////////////////////////////// diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 7755f0f88d..fd0136298a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -261,7 +261,8 @@ class plMetalPipeline : public pl3DPipeline void PushCurrentLightSources(); void PopCurrentLightSources(); - std::vector fLightSourceStack; + plMetalLights fLights; + std::vector fLightSourceStack; static plMetalEnumerate enumerator; From 5ff886e3b6eee7cc81aa29ffe5e859baf07504e6 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 4 Aug 2022 23:42:29 -0700 Subject: [PATCH 097/165] Fixes for memory leaks Also fixing finding client data when app is launched from Finder --- .../pfMetalPipeline/plMetalDevice.cpp | 4 +- .../plMetalDevicePerformanceShaders.mm | 21 ++++- .../pfMetalPipeline/plMetalDeviceRefs.cpp | 2 + .../pfMetalPipeline/plMetalPipeline.cpp | 84 +------------------ .../pfMetalPipeline/plMetalPipeline.h | 1 - 5 files changed, 28 insertions(+), 84 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 03647d1cce..1a3fe6cc41 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -147,6 +147,7 @@ void plMetalDevice::SetMaxAnsiotropy(uint8_t maxAnsiotropy) samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeClampToEdge); samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeClampToEdge); fSamplerStates[3] = fMetalDevice->newSamplerState(samplerDescriptor); + samplerDescriptor->release(); } void plMetalDevice::SetMSAASampleCount(uint8_t sampleCount) @@ -1224,6 +1225,7 @@ void plMetalDevice::CreateGammaAdjustState() { NS::Error *error; fGammaAdjustState->release(); fGammaAdjustState = fMetalDevice->newRenderPipelineState(gammaDescriptor, &error); + gammaDescriptor->release(); } void plMetalDevice::PostprocessIntoDrawable() { @@ -1304,7 +1306,7 @@ void plMetalDevice::BlitTexture(MTL::Texture* src, MTL::Texture* dst) fBlitCommandEncoder = fBlitCommandBuffer->blitCommandEncoder()->retain(); } - fBlitCommandEncoder->copyFromTexture(src, dst); + fBlitCommandEncoder->copyFromTexture(src, 0, 0, MTL::Origin(0, 0, 0), MTL::Size(src->width(), src->height(), 0), dst, 0, 0, MTL::Origin(0, 0, 0)); } #endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm index 92b6b0c1a7..de63176105 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm @@ -50,6 +50,7 @@ //FIXME: Blurring currently ends a pass - and restarting a pass will possibly clear one or more buffers //Technically shadow blurring only happens at the end of the render pass though... CurrentRenderCommandEncoder()->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; //look up the shader by sigma value @@ -60,9 +61,25 @@ blur = [[MPSImageGaussianBlur alloc] initWithDevice:(id)fMetalDevice sigma:sigma]; fBlurShaders[sigma] = (NS::Object*)blur; } - [blur encodeToCommandBuffer:(id)commandBuffer inPlaceTexture:(id*)&texture fallbackCopyAllocator:^ id (MPSKernel * kernel, id commandBuffer, id texture) { + + //we'd like to do the blur in place, but Metal might not let us. + //if it allocates a new texture, we'll have to glit that data back to the original + id destTexture = (id)texture; + bool result = [blur encodeToCommandBuffer:(id)commandBuffer inPlaceTexture:(id*)&destTexture fallbackCopyAllocator:^ id (MPSKernel * kernel, id commandBuffer, id texture) { + //this copy allocator will release the original texture - that texture is important, don't let it + [texture retain]; MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor((MTL::PixelFormat)texture.pixelFormat, texture.width, texture.height, false); descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); - return (id)fMetalDevice->newTexture(descriptor); + return (id)fMetalDevice->newTexture(descriptor)->autorelease(); }]; + + //did Metal change our original texture? + if (destTexture != (id)texture) { + //we'll need to blit the dest texture back to the source + //we just committed a compute pass, buffer should be free for us to create + //a blit encoder + id blitEncoder = [(id)GetCurrentCommandBuffer() blitCommandEncoder]; + [blitEncoder copyFromTexture:destTexture sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 0) toTexture:(id)texture destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)]; + [blitEncoder endEncoding]; + } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp index 5c94dc2c0a..2dce9f3382 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp @@ -98,6 +98,8 @@ void plMetalDeviceRef::Link(plMetalDeviceRef **back) { plMetalVertexBufferRef::~plMetalVertexBufferRef() { + if (fData) + delete fData; Release(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 0f51df538c..05b995a2f3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -150,7 +150,7 @@ bool plRenderTriListFunc::RenderPrims() const plProfile_IncCount(DrawTriangles, fNumTris); plProfile_Inc(DrawPrimStatic); - + size_t uniformsSize = offsetof(VertexUniforms, uvTransforms) + sizeof(UVOutDescriptor) * fDevice->fPipeline->fCurrNumLayers; fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), BufferIndexState); plMetalLights* lights = &fDevice->fPipeline->fLights; @@ -1326,7 +1326,7 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI //This is a bit weird - in since this isn't a material we need to build a query for the right Metal program ourselves plMetalFragmentShaderDescription description; memset(&description, 0, sizeof(description)); - description.numLayers = 1; + description.numLayers = fCurrNumLayers = 1; description.Populate(proj, 0); //DX sets the color invert when the final color should be inverted. Not sure why! @@ -1499,45 +1499,6 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) render.RenderPrims(); } - - /*HRESULT r; - - r = fD3DDevice->SetStreamSource( 0, vRef->fD3DBuffer, 0, vRef->fVertexSize ); - hsAssert( r == D3D_OK, "Error trying to set the stream source!" ); - plProfile_Inc(VertexChange); - - fD3DDevice->SetFVF(fSettings.fCurrFVFFormat = IGetBufferD3DFormat(vRef->fFormat)); - - r = fD3DDevice->SetIndices( iRef->fD3DBuffer ); - hsAssert( r == D3D_OK, "Error trying to set the indices!" ); - - plRenderTriListFunc render(fD3DDevice, iRef->fOffset, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength/3); - int j; - for( j = 0; j < material->GetNumLayers(); ) - { - int iCurrMat = j; - j = IHandleMaterial( material, iCurrMat, &span ); - if (j == -1) - break; - - ISetShaders(material->GetLayer(iCurrMat)->GetVertexShader(), material->GetLayer(iCurrMat)->GetPixelShader()); - - if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) - { - static D3DMATERIAL9 mat; - fD3DDevice->SetRenderState(D3DRS_AMBIENT, 0xffffffff); - - fD3DDevice->SetRenderState( D3DRS_DIFFUSEMATERIALSOURCE, D3DMCS_MATERIAL ); - fD3DDevice->SetRenderState( D3DRS_AMBIENTMATERIALSOURCE, D3DMCS_COLOR1 ); - fD3DDevice->SetRenderState( D3DRS_EMISSIVEMATERIALSOURCE, D3DMCS_MATERIAL ); - fD3DDevice->SetRenderState( D3DRS_SPECULARMATERIALSOURCE, D3DMCS_MATERIAL ); - - fD3DDevice->SetMaterial( &mat ); - } - - render.RenderPrims(); - }*/ - } bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) @@ -3044,6 +3005,7 @@ void plMetalPipeline::FindFragFunction() { ); fFragFunction = fragFunction; + functionContents->release(); library->release(); } @@ -3992,7 +3954,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con struct plMetalFragmentShaderDescription passDescription; memset(&passDescription, 0, sizeof(passDescription)); passDescription.Populate(mat->GetLayer(0), 2); - passDescription.numLayers = 3; + passDescription.numLayers = fCurrNumLayers = 3; if (mat->GetNumLayers()>1) { passDescription.Populate(mat->GetLayer(1), 2); } @@ -4576,44 +4538,6 @@ void plMetalPipeline::CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) } } -// ISetupVertexBufferRef ///////////////////////////////////////////////////////// -// Initialize input vertex buffer ref according to source. -void plMetalPipeline::ISetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalVertexBufferRef* vRef) -{ - - uint8_t format = owner->GetVertexFormat(); - - // All indexed skinning is currently done on CPU, so the source data - // will have indices, but we strip them out for the D3D buffer. - if( format & plGBufferGroup::kSkinIndices ) - { - format &= ~(plGBufferGroup::kSkinWeightMask | plGBufferGroup::kSkinIndices); - format |= plGBufferGroup::kSkinNoWeights; // Should do nothing, but just in case... - vRef->SetSkinned(true); - vRef->SetVolatile(true); - } - - uint32_t vertSize = IGetBufferFormatSize(format); // vertex stride - uint32_t numVerts = owner->GetVertBufferCount(idx); - - vRef->fOwner = owner; - vRef->fCount = numVerts; - vRef->fVertexSize = vertSize; - vRef->fFormat = format; - vRef->fRefTime = 0; - - vRef->SetDirty(true); - vRef->SetRebuiltSinceUsed(true); - vRef->fData = nullptr; - - vRef->SetVolatile(vRef->Volatile() || owner->AreVertsVolatile()); - - vRef->fIndex = idx; - - owner->SetVertexBufferRef(idx, vRef); - hsRefCnt_SafeUnRef(vRef); -} - // CheckIndexBufferRef ///////////////////////////////////////////////////// // Make sure the buffer group has an index buffer ref and that its data is current. void plMetalPipeline::CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index fd0136298a..eaab600302 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -161,7 +161,6 @@ class plMetalPipeline : public pl3DPipeline hsGDeviceRef *MakeTextureRef(plBitmap* bitmap); void IReloadTexture( plBitmap* bitmap, plMetalTextureRef *ref ); - void ISetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalVertexBufferRef* vRef); uint32_t IGetBufferFormatSize( uint8_t format ) const; plRenderTarget* PopRenderTarget() override; From ed6d806dbce966c7f05f87ce1430c5e5df0edc49 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 7 Aug 2022 00:08:44 -0700 Subject: [PATCH 098/165] Fix for full screen being reversed --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 05b995a2f3..7214673373 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -717,7 +717,7 @@ void plMetalPipeline::RenderScreenElements() { plProfile_EndTiming(Reset); } -bool plMetalPipeline::IsFullScreen() const { return fDefaultPipeParams.Windowed; } +bool plMetalPipeline::IsFullScreen() const { return !fDefaultPipeParams.Windowed; } void plMetalPipeline::Resize(uint32_t width, uint32_t height) { From 85da7d947a842aa058eea597143b518d388d5b8a Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 7 Aug 2022 20:17:55 -0700 Subject: [PATCH 099/165] Fixing render issues when MSAA disabled --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 6 +++++- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 6 +++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 1a3fe6cc41..053fe26736 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -300,7 +300,11 @@ void plMetalDevice::BeginNewRenderPass() { if (fSampleCount == 1) { - renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); + if (NeedsPostprocessing()) { + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentUnprocessedOutputTexture); + } else { + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); + } } else { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 7214673373..8d118f9d91 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -620,9 +620,6 @@ bool plMetalPipeline::BeginRender() // If this is the primary BeginRender, make sure we're really ready. if (fInSceneDepth++ == 0) { - /// If we have a renderTarget active, use its viewport - fDevice.SetViewport(); - fDevice.BeginRender(); fVtxRefTime++; @@ -639,6 +636,9 @@ bool plMetalPipeline::BeginRender() } fDevice.CreateNewCommandBuffer(drawable); drawable->release(); + + /// If we have a renderTarget active, use its viewport + fDevice.SetViewport(); } fRenderCnt++; From e57ad876fe4543d4a8878233642bbd12ca289884 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 7 Aug 2022 19:53:25 -0700 Subject: [PATCH 100/165] Trying to reduce register pressure in vs --- .../ShaderSrc/FixedPipelineShaders.metal | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 0164de7bf2..71604e02b7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -259,20 +259,19 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], if(lightSource->scale == 0.0h) continue; - float attenuation; - float3 direction; + //w is attenation + float4 direction; if (lightSource->position.w == 0.0) { // Directional Light with no attenuation - direction = -(lightSource->direction).xyz; - attenuation = 1.0; + direction = float4(-(lightSource->direction).xyz, 1.0); } else { // Omni Light in all directions const float3 v2l = lightSource->position.xyz - float3(uniforms.localToWorldMatrix * float4(in.position, 1.0)); const float distance = length(v2l); - direction = normalize(v2l); + direction.xyz = normalize(v2l); - attenuation = 1.0 / (lightSource->constAtten + lightSource->linAtten * distance + lightSource->quadAtten * pow(distance, 2.0)); + direction.w = 1.0 / (lightSource->constAtten + lightSource->linAtten * distance + lightSource->quadAtten * pow(distance, 2.0)); if (lightSource->spotProps.x > 0.0) { // Spot Light with cone falloff @@ -281,13 +280,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], const float phi = lightSource->spotProps.z; const float result = pow((a - phi) / (theta - phi), lightSource->spotProps.x); - attenuation *= clamp(result, 0.0, 1.0); + direction.w *= clamp(result, 0.0, 1.0); } } - LAmbient.rgb = LAmbient.rgb + half3(attenuation * (lightSource->ambient.rgb * lightSource->scale)); - float3 dotResult = dot(Ndirection, direction); - LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (lightSource->diffuse.rgb * lightSource->scale) * half3(max(0.0, dotResult) * attenuation); + LAmbient.rgb = LAmbient.rgb + half3(direction.w * (lightSource->ambient.rgb * lightSource->scale)); + const float3 dotResult = dot(Ndirection, direction.xyz); + LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (lightSource->diffuse.rgb * lightSource->scale) * half3(max(0.0, dotResult) * direction.w); } const half3 ambient = clamp((MAmbient.rgb) * (uniforms.globalAmb.rgb + LAmbient.rgb), 0.0, 1.0); From 8cc5e338bbfa807357e8b7216e3e8f6bcac6b8c8 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 7 Aug 2022 22:51:06 -0700 Subject: [PATCH 101/165] Turning off main framebuffer viewport setting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The engine doesn’t know the size, so it keeps setting it to the wrong one. The default viewport size is just fine anyway. --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 8d118f9d91..d16a33e174 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -638,7 +638,7 @@ bool plMetalPipeline::BeginRender() drawable->release(); /// If we have a renderTarget active, use its viewport - fDevice.SetViewport(); + //fDevice.SetViewport(); } fRenderCnt++; From 0af96b97fceca1822756bf4654c2d84528e8de1e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 8 Aug 2022 16:53:34 -0700 Subject: [PATCH 102/165] Fixing gamma texture descriptor I thought I already fixed this? --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 053fe26736..c14d23a377 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -1028,6 +1028,7 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) { MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false); mainPassDescriptor->setStorageMode(MTL::StorageModePrivate); + mainPassDescriptor->setUsage(MTL::TextureUsageShaderRead || MTL::TextureUsageRenderTarget); fCurrentUnprocessedOutputTexture->release(); fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor); } From d64b12c97284ed1bb56724bbadd55543e156e90e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 8 Aug 2022 21:03:46 -0700 Subject: [PATCH 103/165] Fixing MPS Blur Copy depth is 1, not 0 --- .../pfMetalPipeline/plMetalDevicePerformanceShaders.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm index de63176105..888c029799 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm @@ -79,7 +79,7 @@ //we just committed a compute pass, buffer should be free for us to create //a blit encoder id blitEncoder = [(id)GetCurrentCommandBuffer() blitCommandEncoder]; - [blitEncoder copyFromTexture:destTexture sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 0) toTexture:(id)texture destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)]; + [blitEncoder copyFromTexture:destTexture sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 1) toTexture:(id)texture destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)]; [blitEncoder endEncoding]; } } From 4befb9532f4b32da5f7005faf9134a11f2e30900 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 8 Aug 2022 21:05:28 -0700 Subject: [PATCH 104/165] Binding samplers at encoder insetad of in shader This removes the function constants for the sampler type - and all the logic that goes with. Samplers are now encoded in an array. This should simplify what the fragment shader has to do - and should fix Intel Integrated Graphics which was unhappy with samplers as a non-const variable. --- .../ShaderSrc/FixedPipelineShaders.metal | 42 +++++-------------- .../plMetalMaterialShaderRef.cpp | 15 +++++++ .../pfMetalPipeline/plMetalPipelineState.cpp | 17 -------- .../pfMetalPipeline/plMetalPipelineState.h | 7 +--- 4 files changed, 27 insertions(+), 54 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 71604e02b7..685c799ae5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -169,21 +169,11 @@ constant const uint32_t miscFlags5 [[ function_constant(FunctionConstantLayerFla constant const uint32_t miscFlags6 [[ function_constant(FunctionConstantLayerFlags + 5) ]]; constant const uint32_t miscFlags7 [[ function_constant(FunctionConstantLayerFlags + 6) ]]; constant const uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFlags + 7) ]]; - -constant const uint8_t sampleType1 [[ function_constant(FunctionConstantSampleTypes + 0) ]]; -constant const uint8_t sampleType2 [[ function_constant(FunctionConstantSampleTypes + 1) ]]; -constant const uint8_t sampleType3 [[ function_constant(FunctionConstantSampleTypes + 2) ]]; -constant const uint8_t sampleType4 [[ function_constant(FunctionConstantSampleTypes + 3) ]]; -constant const uint8_t sampleType5 [[ function_constant(FunctionConstantSampleTypes + 4) ]]; -constant const uint8_t sampleType6 [[ function_constant(FunctionConstantSampleTypes + 5) ]]; -constant const uint8_t sampleType7 [[ function_constant(FunctionConstantSampleTypes + 6) ]]; -constant const uint8_t sampleType8 [[ function_constant(FunctionConstantSampleTypes + 7) ]]; #define MAX_BLEND_PASSES 8 constant const uint8_t sourceTypes[MAX_BLEND_PASSES] = { sourceType1, sourceType2, sourceType3, sourceType4, sourceType5, sourceType6, sourceType7, sourceType8}; constant const uint32_t blendModes[MAX_BLEND_PASSES] = { blendModes1, blendModes2, blendModes3, blendModes4, blendModes5, blendModes6, blendModes7, blendModes8}; constant const uint32_t miscFlags[MAX_BLEND_PASSES] = { miscFlags1, miscFlags2, miscFlags3, miscFlags4, miscFlags5, miscFlags6, miscFlags7, miscFlags8}; -constant const uint8_t sampleTypes[MAX_BLEND_PASSES] = { sampleType1, sampleType2, sampleType3, sampleType4, sampleType5, sampleType6, sampleType7, sampleType8}; constant const uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); typedef struct { @@ -204,11 +194,16 @@ typedef struct { texturecube cubicTexture7 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 6), function_constant(hasLayer7) ]]; texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasLayer8) ]]; const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; - sampler repeatSampler [[ sampler(0) ]]; - sampler clampRepeatSampler [[ sampler(1) ]]; - sampler repeatClampSampler [[ sampler(2) ]]; - sampler clampSampler [[ sampler(3) ]]; half4 sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const; + //number of layers is variable, so have to declare these samplers the ugly way + sampler samplers [[ sampler(0), function_constant(hasLayer1) ]]; + sampler sampler2 [[ sampler(1), function_constant(hasLayer2) ]]; + sampler sampler3 [[ sampler(2), function_constant(hasLayer3) ]]; + sampler sampler4 [[ sampler(3), function_constant(hasLayer4) ]]; + sampler sampler5 [[ sampler(4), function_constant(hasLayer5) ]]; + sampler sampler6 [[ sampler(5), function_constant(hasLayer6) ]]; + sampler sampler7 [[ sampler(6), function_constant(hasLayer7) ]]; + sampler sampler8 [[ sampler(7), function_constant(hasLayer8) ]]; } FragmentShaderArguments; typedef struct @@ -453,21 +448,6 @@ half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 verte if(passType == PassTypeColor) { return vertexColor; } else { - /* - Not using array based lookup here because the compiler - seems to have an easier time unrolling this if each lookup is done - with a constant. Using an array based lookup was hurting performance by - about 1/3rd on Apple Silicon. - */ - const uint8_t sampleType = sampleTypes[index]; - sampler colorSampler = repeatSampler; - if(sampleType == 1) { - colorSampler = clampRepeatSampler; - } else if(sampleType == 2) { - colorSampler = repeatClampSampler; - } else if(sampleType == 3) { - colorSampler = clampSampler; - } if (miscFlags[index] & kMiscPerspProjection) { sampleCoord.xy /= sampleCoord.z; @@ -475,9 +455,9 @@ half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 verte //do the actual sample if(passType == PassTypeTexture) { - return (&textures)[index].sample(colorSampler, sampleCoord.xy); + return (&textures)[index].sample((&samplers)[index], sampleCoord.xy); } else if(passType == PassTypeCubicTexture) { - return (&cubicTextures)[index].sample(colorSampler, sampleCoord.xyz); + return (&cubicTextures)[index].sample((&samplers)[index], sampleCoord.xyz); } else { return half4(0); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 3cdaf4dbdd..d45cda45b3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -294,6 +294,21 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc } } + + switch (layer->GetClampFlags()) { + case hsGMatState::kClampTextureU: + encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[1], offsetFromRootLayer); + break; + case hsGMatState::kClampTextureV: + encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[2], offsetFromRootLayer); + break; + case hsGMatState::kClampTexture: + encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[3], offsetFromRootLayer); + break; + default: + encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[0], offsetFromRootLayer); + break; + } } uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 5f41316836..d4ef732551 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -106,7 +106,6 @@ void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstan constants->setConstantValues(&fFragmentShaderDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); constants->setConstantValues(&fFragmentShaderDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); constants->setConstantValues(&fFragmentShaderDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); - constants->setConstantValues(&fFragmentShaderDescription.sampleTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSampleTypes, 8)); } size_t plMetalMaterialPassPipelineState::GetHash() const { @@ -328,22 +327,6 @@ void plMetalFragmentShaderDescription::Populate(plLayerInterface* layPtr, uint8_ } else { passTypes[index] = PassTypeColor; } - - - switch (layPtr->GetClampFlags()) { - case hsGMatState::kClampTextureU: - sampleTypes[index] = 1; - break; - case hsGMatState::kClampTextureV: - sampleTypes[index] = 2; - break; - case hsGMatState::kClampTexture: - sampleTypes[index] = 3; - break; - default: - sampleTypes[index] = 0; - break; - } } bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) const { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 35ba34869c..fefc1c4d96 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -115,13 +115,12 @@ struct plMetalFragmentShaderDescription { uint8_t passTypes[8]; uint32_t blendModes[8]; uint32_t miscFlags[8]; - uint8_t sampleTypes[8]; uint8_t numLayers; size_t hash; bool operator==(const plMetalFragmentShaderDescription &p) const { - bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0 && memcmp(sampleTypes, p.sampleTypes, sizeof(sampleTypes)) == 0; + bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0; return match; } @@ -149,10 +148,6 @@ struct plMetalFragmentShaderDescription { value ^= std::hash()( passTypes[i] ); } - for(int i=0;i<8;i++){ - value ^= std::hash()( sampleTypes[i] ); - } - return value; } From ad2774bb1174be4b837d8f1323b181c424152be3 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 8 Aug 2022 21:27:02 -0700 Subject: [PATCH 105/165] =?UTF-8?q?Writing=20the=20bitmask=20correctly=20t?= =?UTF-8?q?his=20time=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index c14d23a377..b145e8d43c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -1028,7 +1028,7 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) { MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false); mainPassDescriptor->setStorageMode(MTL::StorageModePrivate); - mainPassDescriptor->setUsage(MTL::TextureUsageShaderRead || MTL::TextureUsageRenderTarget); + mainPassDescriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageRenderTarget); fCurrentUnprocessedOutputTexture->release(); fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor); } From f3e8184f3654d4910c3ed26d7435ad851352639e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 11 Aug 2022 17:09:24 -0700 Subject: [PATCH 106/165] Sampler states work again on projections MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Sampler state code is now shared. I’m not restoring the monolithic “encode all the layers” treatment that DirectX did. I need some flexbility to encode argument buffers at a later point, which means the material can’t just bind itself to encoder states inside of a black box function. --- .../FeatureLib/pfMetalPipeline/plMetalDevice.cpp | 5 +++++ .../FeatureLib/pfMetalPipeline/plMetalDevice.h | 4 ++++ .../pfMetalPipeline/plMetalMaterialShaderRef.cpp | 16 ++-------------- .../pfMetalPipeline/plMetalPipeline.cpp | 2 ++ 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index b145e8d43c..fb20fdf93a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -1216,6 +1216,11 @@ void plMetalDevice::SubmitCommandBuffer() fClearDrawableDepth = 1.0; } +MTL::SamplerState* plMetalDevice::SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState) +{ + return fSamplerStates[sampleState]; +} + void plMetalDevice::CreateGammaAdjustState() { MTL::RenderPipelineDescriptor *gammaDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); MTL::Library* library = fMetalDevice->newDefaultLibrary(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 776e999e33..4574810f67 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -44,6 +44,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "HeadSpin.h" +#include "hsGMatState.h" + #include "plMetalDeviceRef.h" #include "hsMatrix44.h" @@ -173,6 +175,8 @@ class plMetalDevice void SetMaxAnsiotropy(uint8_t maxAnsiotropy); void SetMSAASampleCount(uint8_t sampleCount); + MTL::SamplerState* SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState); + NS::UInteger CurrentTargetSampleCount() { if (fCurrentRenderTarget) { return 1; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index d45cda45b3..31fe6a401c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -295,20 +295,8 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc } - switch (layer->GetClampFlags()) { - case hsGMatState::kClampTextureU: - encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[1], offsetFromRootLayer); - break; - case hsGMatState::kClampTextureV: - encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[2], offsetFromRootLayer); - break; - case hsGMatState::kClampTexture: - encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[3], offsetFromRootLayer); - break; - default: - encoder->setFragmentSamplerState(fPipeline->fDevice.fSamplerStates[0], offsetFromRootLayer); - break; - } + MTL::SamplerState* samplerState = fPipeline->fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(layer->GetClampFlags())); + encoder->setFragmentSamplerState(samplerState, offsetFromRootLayer); } uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index d16a33e174..7938433155 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1340,6 +1340,8 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI fState.fCurrentPipelineState = linkedPipeline->pipelineState; fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(tex->fTexture, 0); + MTL::SamplerState* samplerState = fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(proj->GetClampFlags())); + fDevice.CurrentRenderCommandEncoder()->setFragmentSamplerState(samplerState, 0); // Okay, render it already. From 0a1751a332c665241c9bca57acf812da6f6d22fc Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 11 Aug 2022 17:10:24 -0700 Subject: [PATCH 107/165] Fixes for spot lighting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cleaning up the variable names as well. Spot lighting was broken on Intel Integrated Graphics. The spot lighting intensity wasn’t clamped before the falloff calculation. --- .../ShaderSrc/FixedPipelineShaders.metal | 15 +++++---------- .../pfMetalPipeline/plMetalPipeline.cpp | 4 ++-- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 685c799ae5..cdb8db534c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -270,12 +270,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], if (lightSource->spotProps.x > 0.0) { // Spot Light with cone falloff - const float a = dot(direction.xyz, normalize(-lightSource->direction).xyz); - const float theta = lightSource->spotProps.y; + const float theta = dot(direction.xyz, normalize(-lightSource->direction).xyz); + const float gamma = lightSource->spotProps.y; const float phi = lightSource->spotProps.z; - const float result = pow((a - phi) / (theta - phi), lightSource->spotProps.x); + const float epsilon = (gamma - phi); + const float intensity = clamp((theta - phi) / epsilon, 0.0, 1.0); - direction.w *= clamp(result, 0.0, 1.0); + direction.w *= pow(intensity, lightSource->spotProps.x); } } @@ -290,12 +291,6 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], abs(uniforms.invVtxAlpha - MDiffuse.a)); out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); - - float4 position = (uniforms.localToWorldMatrix * float4(in.position, 1.0)); - if(temp_hasOnlyWeight1) { - const float4 position2 = blendMatrix1 * float4(in.position, 1.0); - position = (in.weight1 * position) + ((1.0f - in.weight1) * position2); - } const float4 vCamPosition = uniforms.worldToCameraMatrix * position; //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 7938433155..27753cf3f5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2505,10 +2505,10 @@ void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) fLights.lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; float falloff = spotLight->GetFalloff(); - float theta = cosf(spotLight->GetSpotInner()); + float gamma = cosf(spotLight->GetSpotInner()); float phi = cosf(spotLight->GetProjection() ? hsConstants::half_pi : spotLight->GetSpotOuter()); - fLights.lampSources[i].spotProps = { falloff, theta, phi }; + fLights.lampSources[i].spotProps = { falloff, gamma, phi }; } else { fLights.lampSources[i].spotProps = { 0.0, 0.0, 0.0 }; } From 73756ff35785d9ab85620e6b10584658f0b0d667 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 11 Aug 2022 17:11:19 -0700 Subject: [PATCH 108/165] Culling redundant position calculations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Position should be calculated only once now in the vertex shader. Position will also be blended with any skinning matrices before the lighting pass. That’s technically a fix - even though I haven’t seen any defects because of that yet. --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index cdb8db534c..60f0082ba8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -248,6 +248,12 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], half3 LDiffuse = half3(0.0, 0.0, 0.0); const float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz; + + float4 position = (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + if(temp_hasOnlyWeight1) { + const float4 position2 = blendMatrix1 * float4(in.position, 1.0); + position = (in.weight1 * position) + ((1.0f - in.weight1) * position2); + } for (size_t i = 0; i < lights.count; i++) { constant const plMetalShaderLightSource *lightSource = &lights.lampSources[i]; @@ -262,7 +268,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], direction = float4(-(lightSource->direction).xyz, 1.0); } else { // Omni Light in all directions - const float3 v2l = lightSource->position.xyz - float3(uniforms.localToWorldMatrix * float4(in.position, 1.0)); + const float3 v2l = lightSource->position.xyz - position.xyz; const float distance = length(v2l); direction.xyz = normalize(v2l); From f04eeaa66396c2c650ede102654b55b3f687e6c2 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 12 Aug 2022 19:13:30 -0700 Subject: [PATCH 109/165] Fixes for over all projects Kadish light spots on ground now project (seemingly) correctly. Lighting was not correctly configured. --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 2 +- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 60f0082ba8..f48809e9b0 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -291,7 +291,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (lightSource->diffuse.rgb * lightSource->scale) * half3(max(0.0, dotResult) * direction.w); } - const half3 ambient = clamp((MAmbient.rgb) * (uniforms.globalAmb.rgb + LAmbient.rgb), 0.0, 1.0); + const half3 ambient = (MAmbient.rgb) * clamp(uniforms.globalAmb.rgb + LAmbient.rgb, 0.0, 1.0); const half3 diffuse = clamp(LDiffuse.rgb, 0.0, 1.0); const half4 material = half4(clamp(ambient + diffuse + MEmissive.rgb, 0.0, 1.0), abs(uniforms.invVtxAlpha - MDiffuse.a)); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 27753cf3f5..ae2c41ad8b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1304,13 +1304,15 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI IScaleLight(0, true); - fCurrentRenderPassUniforms->ambientCol = half3(0.0); fCurrentRenderPassUniforms->ambientSrc = 1.0; fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->emissiveSrc = 1.0; fCurrentRenderPassUniforms->specularSrc = 1.0; - fCurrentRenderPassUniforms->fogValues = {0.0, 0.0f}; - fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->globalAmb = {1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->emissiveCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->fogColor = {0.0, 0.0, 0.0}; fCurrentRenderPassUniforms->diffuseCol = {1.0, 1.0, 1.0, 1.0}; From 4dbc8528dd601087f9d063ac8290293e32ce2418 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 12 Aug 2022 22:55:24 -0700 Subject: [PATCH 110/165] Fixing weird shadow flickering issues Shadow light needs to be zeroed --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 2 ++ Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index f48809e9b0..988293a23f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -277,7 +277,9 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], if (lightSource->spotProps.x > 0.0) { // Spot Light with cone falloff const float theta = dot(direction.xyz, normalize(-lightSource->direction).xyz); + //inner cutoff const float gamma = lightSource->spotProps.y; + //outer cutoff const float phi = lightSource->spotProps.z; const float epsilon = (gamma - phi); const float intensity = clamp((theta - phi) / epsilon, 0.0, 1.0); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index ae2c41ad8b..ee1a5cbe45 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -3469,7 +3469,7 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) // to fade out a shadow as it gets too far in the distance to matter. void plMetalPipeline::ISetupShadowLight(plShadowSlave* slave) { - plMetalShaderLightSource lRef = fLights.lampSources[0]; + plMetalShaderLightSource lRef = {}; lRef.diffuse.r = lRef.diffuse.g @@ -3997,7 +3997,6 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con } lRef.scale = 1.0; fLights.lampSources[0] = lRef; - fLights.count = 1; // record which our intensity is now set for. fShadows[i]->fSelfShadowOn = selfShadowNow; From e626c9dadb36ae953dff3fdd8a1f7f25877358d8 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 13 Aug 2022 17:56:17 -0700 Subject: [PATCH 111/165] Fixing ghost avatars Avatar textures for other players might be loaded in after material shader creation. That means the material shader state needs to be recreated for a color -> texture transition. Setting the dirty flag when an avatar texture is set - and checking it in the material ref. --- .../pfMetalPipeline/plMetalMaterialShaderRef.cpp | 16 ++++++++++++++++ .../pfMetalPipeline/plMetalPipeline.cpp | 6 ++++++ 2 files changed, 22 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 31fe6a401c..0f16b27fd4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -90,6 +90,21 @@ void plMetalMaterialShaderRef::Release() void plMetalMaterialShaderRef::CheckMateralRef() { + if(IsDirty()) { + /* + Something (like avatars) might have modified our textures. + If we're dirty - clear all cached state. + */ + fNumPasses = 0; + fPassIndices.clear(); + fPassLengths.clear(); + fFragmentShaderDescriptions.clear(); + + for(MTL::Buffer* buffer: fPassArgumentBuffers) { + buffer->release(); + } + fPassArgumentBuffers.clear(); + } if(fNumPasses == 0) { ILoopOverLayers(); @@ -102,6 +117,7 @@ void plMetalMaterialShaderRef::CheckMateralRef() fPipeline->CheckTextureRef(layer); } } + SetDirty(false); } //fast encode doesn't support piggybacks or push over layers, but it does use preloaded data on the GPU so it's much faster. Use this encoder if there are no piggybacks or pushover layers diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index ee1a5cbe45..9af030672c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2843,6 +2843,12 @@ void plMetalPipeline::IPreprocessAvatarTextures() if (rt == nullptr) { rt = IGetNextAvRT(); + //we're about to add a texture that wasn't there before + //mark the material as dirty + plMetalMaterialShaderRef* ref = static_cast(co->fMaterial->GetDeviceRef()); + if (ref) { + ref->SetDirty(true); + } co->fTargetLayer->SetTexture(rt); } From 8e5fb97171b4d2fa95c0dbb2490f9950b4b7baa6 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 30 Aug 2022 23:01:38 -0700 Subject: [PATCH 112/165] Fixes for lighting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The lighting struct was being written wrong when projection lights are also present. There’s a bit in lighting where it scales down lights that aren’t rendered which - makes no sense. --- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 9af030672c..eb4d1e2038 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2407,11 +2407,13 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef { std::vector& spanLights = span->GetLightList(proj); + fLights.count = 0; for (i = 0; i < spanLights.size() && i < numLights; i++) { // If these are non-projected lights, go ahead and enable them. if( !proj ) { - IEnableLight(i, spanLights[i]); + IEnableLight(fLights.count, spanLights[i]); + fLights.count++; } onLights.emplace_back(spanLights[i]); } @@ -2421,7 +2423,8 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef /// fade them out to nothing as they get closer to the bottom. This way, they fade /// out of existence instead of pop out. - if (i < spanLights.size() - 1 && i > 0) { + //FIXME: In Metal, I'm not sure what this is doing. These lights won't be visible, and visible lights are always fully scaled. + /*if (i < spanLights.size() - 1 && i > 0) { threshhold = span->GetLightStrength(i, proj); i--; overHold = threshhold * 1.5f; @@ -2436,7 +2439,7 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef IScaleLight(i, (1 - scale) * span->GetLightScale(i, proj)); } startScale = i + 1; - } + }*/ /// Make sure those lights that aren't scaled....aren't @@ -2459,11 +2462,6 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef } onLights.clear(); } - fLights.count = i; - - for (; i < numLights; i++) { - IDisableLight(i); - } } void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) From da5555338e2cf2e090511fbeb9291281e178cc31 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 31 Aug 2022 21:30:31 -0700 Subject: [PATCH 113/165] Upping Metal lights max to 32 Restoring the original light count for Plasma for Metal. 32 lights is a lot - but there are a lot of visible improvements. This should possibly be tiered based on hardware. --- .../Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h | 4 +++- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index 4c827bae82..7e21960fea 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -163,9 +163,11 @@ typedef struct #endif } VertexUniforms; +#define kMetalMaxLightCount 32 + typedef struct { uint8_t count; - plMetalShaderLightSource lampSources[8]; + plMetalShaderLightSource lampSources[kMetalMaxLightCount]; } plMetalLights; #endif /* ShaderTypes_h */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index eb4d1e2038..691ee735f3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2392,7 +2392,7 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj) { - const size_t numLights = 8; + const size_t numLights = kMetalMaxLightCount; size_t i = 0; int32_t startScale; float threshhold; From 261dc21f7cf5c130023b8c3374f8ddc6d8208ece Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 25 Sep 2022 21:24:42 -0700 Subject: [PATCH 114/165] Sycronizing GPU state when correcting blend modes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This should fix Ahnonay in it’s current state on Trollland --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 691ee735f3..d6aa83d02a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2204,6 +2204,8 @@ void plMetalPipeline::IHandleBlendMode(hsGMatState flags) lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); } } + //layer state needs to be syncronized to the GPU + static_cast(fCurrMaterial->GetDeviceRef())->SetDirty(true); } break; } From b0c1eabc46092eb0f6b7eb25d87d388e3eb73881 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 22 Nov 2022 20:53:20 -0800 Subject: [PATCH 115/165] Filtering available texture bindings by type MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cubic bindings will no longer be present for layers that don’t have cubic textures. --- .../ShaderSrc/FixedPipelineShaders.metal | 54 ++++++++++++------- 1 file changed, 36 insertions(+), 18 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 988293a23f..5f83ee40b3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -174,25 +174,43 @@ constant const uint32_t miscFlags8 [[ function_constant(FunctionConstantLayerFla constant const uint8_t sourceTypes[MAX_BLEND_PASSES] = { sourceType1, sourceType2, sourceType3, sourceType4, sourceType5, sourceType6, sourceType7, sourceType8}; constant const uint32_t blendModes[MAX_BLEND_PASSES] = { blendModes1, blendModes2, blendModes3, blendModes4, blendModes5, blendModes6, blendModes7, blendModes8}; constant const uint32_t miscFlags[MAX_BLEND_PASSES] = { miscFlags1, miscFlags2, miscFlags3, miscFlags4, miscFlags5, miscFlags6, miscFlags7, miscFlags8}; -constant const uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); - + constant const uint8_t passCount = (sourceType1 > 0) + (sourceType2 > 0) + (sourceType3 > 0) + (sourceType4 > 0) + (sourceType5 > 0) + (sourceType6 > 0) + (sourceType7 > 0) + (sourceType8 > 0); + +constant const bool has2DTexture1 = (sourceType1 == PassTypeTexture && hasLayer1); +constant const bool has2DTexture2 = (sourceType2 == PassTypeTexture && hasLayer2); +constant const bool has2DTexture3 = (sourceType3 == PassTypeTexture && hasLayer3); +constant const bool has2DTexture4 = (sourceType4 == PassTypeTexture && hasLayer4); +constant const bool has2DTexture5 = (sourceType5 == PassTypeTexture && hasLayer5); +constant const bool has2DTexture6 = (sourceType6 == PassTypeTexture && hasLayer6); +constant const bool has2DTexture7 = (sourceType7 == PassTypeTexture && hasLayer7); +constant const bool has2DTexture8 = (sourceType8 == PassTypeTexture && hasLayer8); + +constant const bool hasCubicTexture1 = (sourceType1 == PassTypeCubicTexture && hasLayer1); +constant const bool hasCubicTexture2 = (sourceType2 == PassTypeCubicTexture && hasLayer2); +constant const bool hasCubicTexture3 = (sourceType3 == PassTypeCubicTexture && hasLayer3); +constant const bool hasCubicTexture4 = (sourceType4 == PassTypeCubicTexture && hasLayer4); +constant const bool hasCubicTexture5 = (sourceType5 == PassTypeCubicTexture && hasLayer5); +constant const bool hasCubicTexture6 = (sourceType6 == PassTypeCubicTexture && hasLayer6); +constant const bool hasCubicTexture7 = (sourceType7 == PassTypeCubicTexture && hasLayer7); +constant const bool hasCubicTexture8 = (sourceType8 == PassTypeCubicTexture && hasLayer8); + typedef struct { - texture2d textures [[ texture(FragmentShaderArgumentAttributeTextures), function_constant(hasLayer1) ]]; - texture2d texture2 [[ texture(FragmentShaderArgumentAttributeTextures + 1), function_constant(hasLayer2) ]]; - texture2d texture3 [[ texture(FragmentShaderArgumentAttributeTextures + 2), function_constant(hasLayer3) ]]; - texture2d texture4 [[ texture(FragmentShaderArgumentAttributeTextures + 3), function_constant(hasLayer4) ]]; - texture2d texture5 [[ texture(FragmentShaderArgumentAttributeTextures + 4), function_constant(hasLayer5) ]]; - texture2d texture6 [[ texture(FragmentShaderArgumentAttributeTextures + 5), function_constant(hasLayer6) ]]; - texture2d texture7 [[ texture(FragmentShaderArgumentAttributeTextures + 6), function_constant(hasLayer7) ]]; - texture2d texture8 [[ texture(FragmentShaderArgumentAttributeTextures + 7), function_constant(hasLayer8) ]]; - texturecube cubicTextures [[ texture(FragmentShaderArgumentAttributeCubicTextures), function_constant(hasLayer1) ]]; - texturecube cubicTexture2 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1), function_constant(hasLayer2) ]]; - texturecube cubicTexture3 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 2), function_constant(hasLayer3) ]]; - texturecube cubicTexture4 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3), function_constant(hasLayer4) ]]; - texturecube cubicTexture5 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 4), function_constant(hasLayer5) ]]; - texturecube cubicTexture6 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 5), function_constant(hasLayer6) ]]; - texturecube cubicTexture7 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 6), function_constant(hasLayer7) ]]; - texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasLayer8) ]]; + texture2d textures [[ texture(FragmentShaderArgumentAttributeTextures), function_constant(has2DTexture1) ]]; + texture2d texture2 [[ texture(FragmentShaderArgumentAttributeTextures + 1), function_constant(has2DTexture2) ]]; + texture2d texture3 [[ texture(FragmentShaderArgumentAttributeTextures + 2), function_constant(has2DTexture3) ]]; + texture2d texture4 [[ texture(FragmentShaderArgumentAttributeTextures + 3), function_constant(has2DTexture4) ]]; + texture2d texture5 [[ texture(FragmentShaderArgumentAttributeTextures + 4), function_constant(has2DTexture5) ]]; + texture2d texture6 [[ texture(FragmentShaderArgumentAttributeTextures + 5), function_constant(has2DTexture6) ]]; + texture2d texture7 [[ texture(FragmentShaderArgumentAttributeTextures + 6), function_constant(has2DTexture7) ]]; + texture2d texture8 [[ texture(FragmentShaderArgumentAttributeTextures + 7), function_constant(has2DTexture8) ]]; + texturecube cubicTextures [[ texture(FragmentShaderArgumentAttributeCubicTextures), function_constant(hasCubicTexture1) ]]; + texturecube cubicTexture2 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1), function_constant(hasCubicTexture2) ]]; + texturecube cubicTexture3 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 2), function_constant(hasCubicTexture3) ]]; + texturecube cubicTexture4 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3), function_constant(hasCubicTexture4) ]]; + texturecube cubicTexture5 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 4), function_constant(hasCubicTexture5) ]]; + texturecube cubicTexture6 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 5), function_constant(hasCubicTexture6) ]]; + texturecube cubicTexture7 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 6), function_constant(hasCubicTexture7) ]]; + texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasCubicTexture8) ]]; const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; half4 sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const; //number of layers is variable, so have to declare these samplers the ugly way From 0578e89fc0d8988ac2005c18ae26666696ba6bbe Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 23 Dec 2022 14:27:45 -0800 Subject: [PATCH 116/165] Fixing crash w aux spans and projections Need to reset the index buffer earlier in the render span function. Render all projections was a late addition, and it got added to the wrong place relative to the buffer restoration. --- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index d6aa83d02a..a652161a25 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1250,6 +1250,11 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, // j == -1 means we aborted render. if( pass >= 0 ) { + //if we had to render aux spans, we probably changed the vertex and index buffer + //reset those + fState.fCurrentVertexBuffer = vRef->GetBuffer(); + fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); + // Projections that get applied to the frame buffer (after all passes). if( fProjAll.size() && !(fView.fRenderState & kRenderNoProjection) ) { fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::MakeConstantString("Render All Projections")); @@ -1259,11 +1264,6 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, // Handle render of shadows onto geometry. if( fShadows.size() ) { - //if we had to render aux spans, we probably changed the vertex and index buffer - //reset those - fState.fCurrentVertexBuffer = vRef->GetBuffer(); - fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); - IRenderShadowsOntoSpan(render, &span, material, vRef); } } From 430974055aef41d36f99f16a4e12ba0a694c2a98 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 30 Dec 2022 01:14:37 -0800 Subject: [PATCH 117/165] Fixing shadows not rendering on some hardware Shadow commands were being given inconsistant function constants on compliation. The shader is looking for the source types to be properly encoded as constants, but the shadow shader was hard coded to look for the source type constant for layer 3. This caused layers 1 and 2 to be dropped and not properly bound because the shader assumed there was nothing there. --- .../pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal | 4 ++-- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 5f83ee40b3..b74f7e406f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -694,13 +694,13 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], //only possible alpha sources are layers 0 or 1 if(alphaSrc == 0 && passCount > 0) { - half4 layerColor = layers.sampleLayer(0, in.vtxColor,sourceTypes[2], in.texCoord3); + half4 layerColor = layers.sampleLayer(0, in.vtxColor,sourceTypes[0], in.texCoord3); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; } else if(alphaSrc == 1 && passCount > 1) { - half4 layerColor = layers.sampleLayer(1, in.vtxColor, sourceTypes[2], in.texCoord3); + half4 layerColor = layers.sampleLayer(1, in.vtxColor, sourceTypes[1], in.texCoord3); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index a652161a25..fc334e4cb3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -3963,10 +3963,10 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con struct plMetalFragmentShaderDescription passDescription; memset(&passDescription, 0, sizeof(passDescription)); - passDescription.Populate(mat->GetLayer(0), 2); + passDescription.Populate(mat->GetLayer(0), 0); passDescription.numLayers = fCurrNumLayers = 3; if (mat->GetNumLayers()>1) { - passDescription.Populate(mat->GetLayer(1), 2); + passDescription.Populate(mat->GetLayer(1), 1); } plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); From 3155a3cc6dbc2efb8e89f1d424781b8520b3c01c Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 1 Jan 2023 16:19:37 -0800 Subject: [PATCH 118/165] Fixes for WaveRip and vertex index streaming Streaming of subsets of vertex indexes was broken, fixing. Because of triple buffering or the index buffers, there might still be bugs. A partial update of a buffer should be applied on the previous version of the buffer, but we have to buffer swap. Should consider copying the previous buffers contents into the new buffer before updating for consistancy. WaveRip was also badly broken. Fixing. --- .../pfMetalPipeline/ShaderSrc/WaveRip.metal | 52 ++++++------------- .../pfMetalPipeline/plMetalDevice.cpp | 9 ++-- 2 files changed, 22 insertions(+), 39 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal index 441ee1cd7b..381affab6e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal @@ -68,10 +68,10 @@ typedef struct { float4 Tex1_Row0; float4 Tex1_Row1; float4 Tex1_Row2; - float4 LocalToWorld; float4 L2WRow0; float4 L2WRow1; float4 L2WRow2; + float4 L2WRow3; float4 Lengths; float4 WaterLevel; float4 DepthFalloff; @@ -83,8 +83,8 @@ typedef struct { typedef struct { float4 position [[position]]; - float4 c1; - float4 texCoord0; + half4 c1; + float2 texCoord0; float fog; } waveRipInOut; @@ -197,8 +197,7 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], // Calc our filter (see above). float4 inColor = float4(in.color) / 255.0f; float4 filter = inColor.wwww * uniforms.Lengths; - filter = max(filter, uniforms.NumericConsts.xxxx); - filter = min(filter, uniforms.NumericConsts.zzzz); + filter = clamp(filter, 0.0f, 1.0f); //mov r2, r1; // r2 == sinDist @@ -251,15 +250,6 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], out.position = worldPosition * uniforms.WorldToNDC; out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; - // Now onto texture coordinate generation. - // - // First is the usual texture transform - out.texCoord0 = float4( - dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0), - dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1), - uniforms.NumericConsts.zz - ); - // Dyna Stuff // Constants // c33 = fC1U, fC2U, fC1V, fC2V @@ -271,38 +261,30 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], // // Initialize r1.zw to 0,1 - float4 r1 = float4(0,0,0,1); // Calc r1.x = age, r1.y = atten // age = t - birth. - r1.x = uniforms.LifeConsts.y - in.position.z; + const float age = uniforms.LifeConsts.y - in.texCoord1.z; // atten = clamp0_1(age / ramp) * clamp0_1((life-age) / (life-decay)); // first clamp0_1(age/ramp) - r1.y = r1.x - uniforms.RampBias.y; - r1.y = min(r1.y, 1.0f); - // now clamp0_1((life-age) / (life-decay)); - r1.z = uniforms.LifeConsts.z - in.position.x; - r1.z *= uniforms.LifeConsts.w; - r1.z = clamp(r1.z, 0.0f, 1.0f); - r1.y *= r1.z; + const float atten = clamp(age * uniforms.RampBias.y, 0.0f, 1.0f) + * clamp((uniforms.LifeConsts.z - age) * uniforms.LifeConsts.w, 0.0f, 1.0f); // color is (atten, atten, atten, 1.f) // Need to calculate opacity we would have had from vs_WaveFixedFin7.inl // Right now that's just modulating by r4.y. - out.c1 = (depth * uniforms.LifeConsts.x) * r1.yyyw; + out.c1 = (depth.y * uniforms.LifeConsts.x) * half4(atten, atten, atten, 1.0h); // UVW = (inUVW - 0.5) * scale + 0.5 // where: // scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f - float4 r2 = float4(0,0,0,1); - r2.xy = r1.xx * uniforms.TexConsts.yw; - r2.xy += 1.0f; - r2.xy = (1.0f/r2.xy); - r2.xy *= uniforms.TexConsts.xz; - r1.xy = in.position.xy - 0.5f; - r1.xy *= r2.xy; - r1.xy += 0.5f; - out.texCoord0 = r1; + float2 scale = age * uniforms.TexConsts.yw; + scale += 1.0f; + scale = (1.0f/scale); + scale *= uniforms.TexConsts.xz; + out.texCoord0 = in.texCoord1.xy - 0.5f; + out.texCoord0 *= scale.xy; + out.texCoord0 += 0.5f; return out; } @@ -312,8 +294,8 @@ fragment half4 ps_WaveRip(waveRipInOut in [[stage_in]], constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, - address::repeat); + address::clamp_to_edge); half4 t0 = texture.sample(colorSampler, in.texCoord0.xy); - return t0 * half4(in.c1); + return t0 * in.c1; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index fb20fdf93a..05e67721c3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -683,6 +683,7 @@ void plMetalDevice::CheckIndexBuffer(plMetalDevice::IndexBufferRef *iRef) void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef *iRef, plGBufferGroup *owner, uint32_t idx) { uint32_t startIdx = owner->GetIndexBufferStart(idx); + uint32_t fullSize = owner->GetIndexBufferCount(idx) * sizeof(uint16_t); uint32_t size = (owner->GetIndexBufferEnd(idx) - startIdx) * sizeof(uint16_t); if (!size) @@ -692,14 +693,14 @@ void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef *iRef, plGB iRef->PrepareForWrite(); MTL::Buffer* indexBuffer = iRef->GetBuffer(); - if(!indexBuffer || indexBuffer->length() < size) { - indexBuffer = fMetalDevice->newBuffer(size, MTL::ResourceStorageModeManaged); + if(!indexBuffer || indexBuffer->length() < fullSize) { + indexBuffer = fMetalDevice->newBuffer(fullSize, MTL::ResourceStorageModeManaged); iRef->SetBuffer(indexBuffer); indexBuffer->release(); } - memcpy(indexBuffer->contents(), owner->GetIndexBufferData(idx), size); - indexBuffer->didModifyRange(NS::Range(0, size)); + memcpy(((uint16_t*)indexBuffer->contents()) + startIdx, owner->GetIndexBufferData(idx) + startIdx, size); + indexBuffer->didModifyRange(NS::Range(startIdx, size)); iRef->SetDirty(false); } From d68fbfa9646d5a045e2e5cbfc0fcd94b07a84aff Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 5 Feb 2023 13:00:16 -0800 Subject: [PATCH 119/165] Fixing Metal logging --- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index fc334e4cb3..0315ff35a3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1184,7 +1184,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, // Turn on this spans lights and turn off the rest. ISelectLights(&span, mRef); -#ifdef _DEBUG +#ifdef HS_DEBUGGING fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string(material->GetKeyName().c_str(), NS::UTF8StringEncoding)); #endif @@ -1218,11 +1218,11 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, // Take care of projections that get applied to each pass. if( fProjEach.size() && !(fView.fRenderState & kRenderNoProjection) ) { -#ifdef _DEBUG +#ifdef HS_DEBUGGING fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string("Render projections", NS::UTF8StringEncoding)); #endif IRenderProjectionEach(render, material, pass, span, vRef); -#ifdef _DEBUG +#ifdef HS_DEBUGGING fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); #endif } @@ -1271,7 +1271,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, if ( span.GetNumAuxSpans() || (pass >= 0 && fShadows.size()) ) { } -#ifdef _DEBUG +#ifdef HS_DEBUGGING fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); #endif } From 7433165c771c0abf9e14c9408c1657172a993a98 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 16 Feb 2023 22:42:48 -0800 Subject: [PATCH 120/165] Getting rid of stray log --- Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp | 3 --- 1 file changed, 3 deletions(-) diff --git a/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp b/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp index bf806ad867..28dfdfb6f6 100644 --- a/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp +++ b/Sources/Plasma/PubUtilLib/plGImage/plMipmap.cpp @@ -463,9 +463,6 @@ plMipmap *plMipmap::IReadRLEImage( hsStream *stream ) bool done = false; plMipmap *retVal = new plMipmap(fWidth,fHeight,plMipmap::kARGB32Config,1); - if(retVal->GetKeyName() == "RightDTMap2_dynText") { - printf("hi"); - } uint32_t *curPos = (uint32_t*)retVal->fImage; uint32_t curLoc = 0; From eb1f3c8d3bc2b33c2e50f4d25fbc470dc007d508 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 18 Feb 2023 22:01:43 -0800 Subject: [PATCH 121/165] Initial work on String Theory wrapper --- Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index 37c4ab4ada..278761d7a4 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -502,7 +502,7 @@ - (void)startClient - (void)updateWindowTitle { - NSString *productTitle = [NSString stringWithCString:plProduct::LongName().c_str() encoding:NSUTF8StringEncoding]; + NSString *productTitle = [NSString stringWithSTString:plProduct::LongName()]; id device = ((CAMetalLayer *) self.window.contentView.layer).device; #ifdef HS_DEBUGGING [self.window setTitle:[NSString stringWithFormat:@"%@ - %@, %@", From 7d00365f6fc0c9f68f3d96abc594e516a88c0223 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 18 Feb 2023 23:59:19 -0800 Subject: [PATCH 122/165] Using target_include_directories MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Haven’t added platform specific scoping yet --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 65b9332ca5..515560534c 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -136,8 +136,6 @@ elseif(APPLE) set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) source_group("Metal Shaders" FILES ${plClient_SHADERS}) set(plClient_SOURCES ${plClient_SOURCES} ${plClient_SHADERS}) - include_directories("../../FeatureLib/pfMetalPipeline/metal-cpp" - "../../FeatureLib/pfMetalPipeline/ShaderSrc") else() list(APPEND plClient_SOURCES main.cpp @@ -185,6 +183,8 @@ if(APPLE) set_source_files_properties(Mac-Cocoa/Assets.xcassets ${RESOURCES} PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) + target_include_directories(plClient PRIVATE "../../FeatureLib/pfMetalPipeline/metal-cpp" + PRIVATE "../../FeatureLib/pfMetalPipeline/ShaderSrc") target_link_libraries(plClient PRIVATE "-framework MetalPerformanceShaders") install( TARGETS plClient From ab162199eacc4ade3d3bd3c32cb8510fc154f8b1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 19 Feb 2023 00:04:56 -0800 Subject: [PATCH 123/165] Removing extra manual clamp --- Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp b/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp index f43f926537..539400e173 100644 --- a/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp +++ b/Sources/Plasma/FeatureLib/pfConsole/pfConsoleCommands.cpp @@ -1341,10 +1341,6 @@ PF_CONSOLE_CMD( Graphics_Renderer, Gamma2, "float g", "Set gamma value (alternat float sinT = std::sin(t * hsConstants::pi / 2.f); float remap = std::clamp(t + (sinT - t) * g, 0.f, 1.f); - if( remap < 0 ) - remap = 0; - else if( remap > 1.f ) - remap = 1.f; ramp[i] = uint16_t(remap * float(uint16_t(-1)) + 0.5f); } From c4cec5c4ae992fa4d2488e8f91e99347d46a3775 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 19 Feb 2023 00:07:02 -0800 Subject: [PATCH 124/165] Removing extra " from pfAllCreatables Co-authored-by: dgelessus --- Sources/Plasma/FeatureLib/inc/pfAllCreatables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h b/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h index 6b7079516e..7caacf6adf 100644 --- a/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h +++ b/Sources/Plasma/FeatureLib/inc/pfAllCreatables.h @@ -59,7 +59,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifdef PLASMA_PIPELINE_GL #include "pfGLPipeline/pfGLPipelineCreatable.h" -#endif" +#endif #ifdef PLASMA_PIPELINE_METAL #include "pfMetalPipeline/pfMetalPipelineCreatable.h" From 9bdea54a03a8794b702371b05337c03b213afac4 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 19 Feb 2023 11:14:37 -0800 Subject: [PATCH 125/165] Removing Apple OpenAL support Only OpenAL soft support now --- Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp | 4 ---- Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h | 4 ---- 2 files changed, 8 deletions(-) diff --git a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp index 93c9900201..612716f878 100644 --- a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp +++ b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem.cpp @@ -43,10 +43,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #ifdef USE_EFX # include -#else -# if __APPLE__ -# include -# endif #endif #ifdef EAX_SDK_AVAILABLE # include diff --git a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h index 20818f54e3..90256ff985 100644 --- a/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h +++ b/Sources/Plasma/PubUtilLib/plAudio/plAudioSystem_Private.h @@ -48,10 +48,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #ifdef USE_EFX # include -#else -#if __APPLE__ -# include -#endif #endif #ifdef EAX_SDK_AVAILABLE # include From d36cdebb229b73d52d5cc8faa9419c04eaeccf8b Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 27 Feb 2023 00:05:55 -0800 Subject: [PATCH 126/165] Fixing missing shadows bug Shadow stages 0 and 1 and the normal stages 0 and 1 conflict and were tromping over each other. Redoing this part of the pipeline to allow for alpha blend textures to be sourced from textures 0 or 1, while allowing the alpha blend texture coordinates to be derived in stage 3. --- .../pfMetalPipeline/plMetalPipeline.cpp | 26 +++++++++++++++++-- .../pfMetalPipeline/plMetalPipelineState.cpp | 8 ++++-- .../pfMetalPipeline/plMetalPipelineState.h | 3 ++- 3 files changed, 32 insertions(+), 5 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 0315ff35a3..85c3b33166 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -3963,11 +3963,33 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con struct plMetalFragmentShaderDescription passDescription; memset(&passDescription, 0, sizeof(passDescription)); - passDescription.Populate(mat->GetLayer(0), 0); + passDescription.numLayers = fCurrNumLayers = 3; + + /* + Things get a wee bit complicated here. + + The texture we want to alpha blend with is already bound to texture 0 or texture 1. + However - the texture co-ords we want are in position 2 in the FVF vertex buffer. (stage 3) + + Build the shader with texture descriptions set properly for textures 0 and 1, + but put the instructions on how to treat the UVW for textures 0 or 1 into + the third stage. + + The shadow cast shader will automatically look in textures 0 and 1 when doing + the third stage blend. This saves us a texture bind. + */ + + passDescription.PopulateTextureInfo(mat->GetLayer(0), 0); + passDescription.Populate(mat->GetLayer(0), 2); + if (mat->GetNumLayers()>1) { - passDescription.Populate(mat->GetLayer(1), 1); + passDescription.PopulateTextureInfo(mat->GetLayer(1), 1); + passDescription.Populate(mat->GetLayer(1), 2); } + //There's no texture for the third stage if we're reusing the textures + //for the first and second stages from the last render. + passDescription.passTypes[2] = PassTypeColor; plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); if(fState.fCurrentPipelineState != linkedPipeline->pipelineState) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index d4ef732551..a9041f615f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -310,10 +310,13 @@ void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAt ConfigureBlendMode(blendMode, descriptor); } -void plMetalFragmentShaderDescription::Populate(plLayerInterface* layPtr, uint8_t index) { +void plMetalFragmentShaderDescription::Populate(const plLayerInterface* layPtr, const uint8_t index) { blendModes[index] = layPtr->GetBlendFlags(); miscFlags[index] = layPtr->GetMiscFlags(); - + PopulateTextureInfo(layPtr, index); +} + +void plMetalFragmentShaderDescription::PopulateTextureInfo(const plLayerInterface* layPtr, const uint8_t index) { plBitmap* texture = layPtr->GetTexture(); if (texture != nullptr) { if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { @@ -327,6 +330,7 @@ void plMetalFragmentShaderDescription::Populate(plLayerInterface* layPtr, uint8_ } else { passTypes[index] = PassTypeColor; } + } bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) const { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index fefc1c4d96..75fc3f262c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -151,7 +151,8 @@ struct plMetalFragmentShaderDescription { return value; } - void Populate(plLayerInterface* layPtr, uint8_t index); + void Populate(const plLayerInterface* layPtr, const uint8_t index); + void PopulateTextureInfo(const plLayerInterface* layPtr, const uint8_t index); }; template<> From fbee202f273cf86d025b59205b89c539b58c3658 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 27 Feb 2023 14:01:24 -0800 Subject: [PATCH 127/165] Support for row major hsMatrix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Swapping the shaders around to support row major hsMatrices and eliminating the swap function. This change hasn’t filtered down to the dynamic effects which get their uniforms through a giant buffer provided by the engine itself. --- .../ShaderSrc/FixedPipelineShaders.metal | 68 +++++++++---------- .../ShaderSrc/PlateShaders.metal | 6 +- .../pfMetalPipeline/plMetalDevice.cpp | 34 ++-------- .../pfMetalPipeline/plMetalDevice.h | 5 +- .../pfMetalPipeline/plMetalPipeline.cpp | 11 +-- 5 files changed, 49 insertions(+), 75 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index b74f7e406f..333d443d40 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -265,9 +265,9 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], half3 LAmbient = half3(0.0, 0.0, 0.0); half3 LDiffuse = half3(0.0, 0.0, 0.0); - const float3 Ndirection = normalize(uniforms.localToWorldMatrix * float4(in.normal, 0.0)).xyz; + const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz; - float4 position = (uniforms.localToWorldMatrix * float4(in.position, 1.0)); + float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix); if(temp_hasOnlyWeight1) { const float4 position2 = blendMatrix1 * float4(in.position, 1.0); position = (in.weight1 * position) + ((1.0f - in.weight1) * position2); @@ -317,7 +317,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], abs(uniforms.invVtxAlpha - MDiffuse.a)); out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); - const float4 vCamPosition = uniforms.worldToCameraMatrix * position; + const float4 vCamPosition = position * uniforms.worldToCameraMatrix; //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); //Fog @@ -333,13 +333,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], } out.fogColor.rgb = uniforms.fogColor; - const float4 normal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.normal, 0.0)); + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; for(size_t layer=0; layercolumns[0][0] = src.fMap[0][0]; - dst->columns[1][0] = src.fMap[0][1]; - dst->columns[2][0] = src.fMap[0][2]; - dst->columns[3][0] = src.fMap[0][3]; - - dst->columns[0][1] = src.fMap[1][0]; - dst->columns[1][1] = src.fMap[1][1]; - dst->columns[2][1] = src.fMap[1][2]; - dst->columns[3][1] = src.fMap[1][3]; - - dst->columns[0][2] = src.fMap[2][0]; - dst->columns[1][2] = src.fMap[2][1]; - dst->columns[2][2] = src.fMap[2][2]; - dst->columns[3][2] = src.fMap[2][3]; - - dst->columns[0][3] = src.fMap[3][0]; - dst->columns[1][3] = src.fMap[3][1]; - dst->columns[2][3] = src.fMap[3][2]; - dst->columns[3][3] = src.fMap[3][3]; - } else { - memcpy(dst, &src.fMap, sizeof(matrix_float4x4)); - } + memcpy(dst, &src.fMap, sizeof(matrix_float4x4)); } return dst; @@ -957,13 +933,13 @@ void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src) hsMatrix2SIMD(inv, &fMatrixC2W); } -void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder) +void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src) { hsMatrix44 inv; src.GetInverse(&inv); - hsMatrix2SIMD(src, &fMatrixL2W, swapOrder); - hsMatrix2SIMD(inv, &fMatrixW2L, swapOrder); + hsMatrix2SIMD(src, &fMatrixL2W); + hsMatrix2SIMD(inv, &fMatrixW2L); } void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 4574810f67..089ac3bc61 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -67,7 +67,8 @@ class plCubicEnvironmap; class plLayerInterface; class plMetalPipelineState; -matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst, bool swapOrder = true); +//NOTE: Results of this will be row major +matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst); class plMetalDevice { @@ -149,7 +150,7 @@ class plMetalDevice void SetProjectionMatrix(const hsMatrix44& src); void SetWorldToCameraMatrix(const hsMatrix44& src); - void SetLocalToWorldMatrix(const hsMatrix44& src, bool swapOrder = true); + void SetLocalToWorldMatrix(const hsMatrix44& src); void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice); uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 85c3b33166..9c8101fcab 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2554,15 +2554,10 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); fState.fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState; - //column major layout simd_float4x4 projMat = matrix_identity_float4x4; - //projMat.columns[2][3] = 1.0f; - //projMat.columns[3][1] = -0.5f; - projMat.columns[3][2] = 0.0f; - projMat.columns[1][1] = 1.0f; /// Set up the transform directly - fDevice.SetLocalToWorldMatrix(plate->GetTransform(), false); + fDevice.SetLocalToWorldMatrix(plate->GetTransform()); IPushPiggyBacks(material); @@ -4440,8 +4435,8 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix); if (weights[j]) { //Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine. - destPt_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)pt_buf); - destNorm_buf += weights[j] * simd_mul(simdMatrix, *(simd_float4 *)vec_buf); + destPt_buf += simd_mul(*(simd_float4 *)pt_buf, simdMatrix) * weights[j]; + destNorm_buf += simd_mul(*(simd_float4 *)vec_buf, simdMatrix) * weights[j]; } //ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf); indices >>= 8; From 7460ded8457c815c62cf686fa536d7dc0b45088d Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 25 Jun 2023 21:41:33 -0700 Subject: [PATCH 128/165] Removing most double decs from Metal shader Moving to using hsGMatStateEnums.h --- .../ShaderSrc/FixedPipelineShaders.metal | 85 ++----------------- 1 file changed, 5 insertions(+), 80 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 333d443d40..9da7fb10ac 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -52,86 +52,11 @@ using namespace metal; #include "ShaderVertex.h" #include "ShaderTypes.h" -//copying this direction from hsGMatState because I am a horrible person but we can't import the header here in since it includes a lot of class stuff. -//FIXME: Come up with something better. -enum hsGMatMiscFlags: uint32_t { - kMiscWireFrame = 0x1, // dev (running out of bits) - kMiscDrawMeshOutlines = 0x2, // dev, currently unimplemented - kMiscTwoSided = 0x4, // view,dev - kMiscDrawAsSplats = 0x8, // dev? bwt - kMiscAdjustPlane = 0x10, - kMiscAdjustCylinder = 0x20, - kMiscAdjustSphere = 0x40, - kMiscAdjust = kMiscAdjustPlane | kMiscAdjustCylinder| kMiscAdjustSphere, - kMiscTroubledLoner = 0x80, - kMiscBindSkip = 0x100, - kMiscBindMask = 0x200, - kMiscBindNext = 0x400, - kMiscLightMap = 0x800, - kMiscUseReflectionXform = 0x1000, // Use the calculated reflection environment - // texture transform instead of layer->GetTransform() - kMiscPerspProjection = 0x2000, - kMiscOrthoProjection = 0x4000, - kMiscProjection = kMiscPerspProjection | kMiscOrthoProjection, - - kMiscRestartPassHere = 0x8000, // Tells pipeline to start a new pass beginning with this layer - // Kinda like troubledLoner, but only cuts off lower layers, not - // higher ones (kMiscBindNext sometimes does this by implication) - - kMiscBumpLayer = 0x10000, - kMiscBumpDu = 0x20000, - kMiscBumpDv = 0x40000, - kMiscBumpDw = 0x80000, - kMiscBumpChans = kMiscBumpDu | kMiscBumpDv | kMiscBumpDw, - - kMiscNoShadowAlpha = 0x100000, - kMiscUseRefractionXform = 0x200000, // Use a refraction-like hack. - kMiscCam2Screen = 0x400000, // Expects tex coords to be XYZ in camera space. Does a cam to screen (not NDC) projection - // and swaps Z with W, so that the texture projection can produce projected 2D screen coordinates. - - kAllMiscFlags = 0xffffffff -}; - -enum hsGMatBlendFlags: uint32_t { - kBlendTest = 0x1, // dev - // Rest of blends are mutually exclusive - kBlendAlpha = 0x2, // dev - kBlendMult = 0x4, // dev - kBlendAdd = 0x8, // dev - kBlendAddColorTimesAlpha = 0x10, // dev - kBlendAntiAlias = 0x20, - kBlendDetail = 0x40, - kBlendNoColor = 0x80, // dev - kBlendMADD = 0x100, - kBlendDot3 = 0x200, - kBlendAddSigned = 0x400, - kBlendAddSigned2X = 0x800, - kBlendMask = kBlendAlpha - | kBlendMult - | kBlendAdd - | kBlendAddColorTimesAlpha - | kBlendDetail - | kBlendMADD - | kBlendDot3 - | kBlendAddSigned - | kBlendAddSigned2X, - kBlendInvertAlpha = 0x1000, // dev - kBlendInvertColor = 0x2000, // dev - kBlendAlphaMult = 0x4000, - kBlendAlphaAdd = 0x8000, - kBlendNoVtxAlpha = 0x10000, - kBlendNoTexColor = 0x20000, - kBlendNoTexAlpha = 0x40000, - kBlendInvertVtxAlpha = 0x80000, // Invert ONLY the vertex alpha source - kBlendAlphaAlways = 0x100000, // Alpha test always passes (even for alpha=0). - kBlendInvertFinalColor = 0x200000, - kBlendInvertFinalAlpha = 0x400000, - kBlendEnvBumpNext = 0x800000, - kBlendSubtract = 0x1000000, - kBlendRevSubtract = 0x2000000, - kBlendAlphaTestHigh = 0x4000000, - kBlendAlphaPremultiplied = 0x8000000 -}; +#define GMAT_STATE_ENUM_START(name) enum name { +#define GMAT_STATE_ENUM_VALUE(name, val) name = val, +#define GMAT_STATE_ENUM_END(name) }; + +#include "hsGMatStateEnums.h" enum plUVWSrcModifiers: uint32_t { kUVWPassThru = 0x00000000, From 0c6387519da9a30c2ac16e36f73369bd4027f37e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 20 Aug 2023 11:21:37 -0700 Subject: [PATCH 129/165] Preventing redundant binds of samplers by tracking them locally --- .../pfMetalPipeline/plMetalMaterialShaderRef.cpp | 9 +++++++-- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 5 +++++ .../FeatureLib/pfMetalPipeline/plMetalPipeline.h | 10 ++++++++++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 0f16b27fd4..c4c85ad2d4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -311,8 +311,13 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc } - MTL::SamplerState* samplerState = fPipeline->fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(layer->GetClampFlags())); - encoder->setFragmentSamplerState(samplerState, offsetFromRootLayer); + if (fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag != layer->GetClampFlags()) + { + MTL::SamplerState* samplerState = fPipeline->fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(layer->GetClampFlags())); + encoder->setFragmentSamplerState(samplerState, offsetFromRootLayer); + + fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag = hsGMatState::hsGMatClampFlags(layer->GetClampFlags()); + } } uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 9c8101fcab..c2802ad6a5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -4619,4 +4619,9 @@ void plMetalPipeline::plMetalPipelineCurrentState::Reset() fCurrentDepthStencilState = nullptr; fCurrentVertexBuffer = nullptr; fCurrentCullMode.reset(); + + for(auto& layer: layerStates) + { + layer.clampFlag = hsGMatState::hsGMatClampFlags(-1); + } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index eaab600302..ca61a25151 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -269,7 +269,17 @@ class plMetalPipeline : public pl3DPipeline NS::AutoreleasePool* fCurrentPool; + /// Describes the state for the "fixed function" shader. struct plMetalPipelineCurrentState { + + // notes state of a given layer for a draw pass + // index is the offset from the curent root layer + // for the draw pass, not the overall index in the + // material + struct plMetalPipelineLayerState { + hsGMatState::hsGMatClampFlags clampFlag; + } layerStates[8]; + std::optional fCurrentCullMode; const MTL::RenderPipelineState* fCurrentPipelineState; MTL::Buffer* fCurrentVertexBuffer; From 1c259793e36ae57e8e43fc210377b5dd680d532e Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 20 Aug 2023 11:27:16 -0700 Subject: [PATCH 130/165] Adding light research from DX source --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index c2802ad6a5..bc39c61150 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2426,6 +2426,10 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef /// out of existence instead of pop out. //FIXME: In Metal, I'm not sure what this is doing. These lights won't be visible, and visible lights are always fully scaled. + // Note from the DX version of the source: + // Find the strongest numLights lights to illuminate the span with. + // Weaker lights are faded out in effect so they won't pop when the + // strongest N changes membership. /*if (i < spanLights.size() - 1 && i > 0) { threshhold = span->GetLightStrength(i, proj); i--; From d9719822625d6efa4c533937479709a7e6bfbd20 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 20 Aug 2023 14:56:39 -0700 Subject: [PATCH 131/165] Cleaning up sampler bug from optimizations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Old sampler bindings were still in place hiding that per layer sampler binding was taking place during shader pre-compile. That caused state confusion because render encoders aren’t available during pre-compile. Removing old sampler bindings and making sure sampler binding only happens during draw - not precompiles. --- .../ShaderSrc/FixedPipelineShaders.metal | 2 +- .../plMetalMaterialShaderRef.cpp | 19 +++++++++++-------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 9da7fb10ac..4483516b40 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -139,7 +139,7 @@ typedef struct { const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; half4 sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const; //number of layers is variable, so have to declare these samplers the ugly way - sampler samplers [[ sampler(0), function_constant(hasLayer1) ]]; + sampler samplers [[ sampler(0), function_constant(hasLayer1) ]]; sampler sampler2 [[ sampler(1), function_constant(hasLayer2) ]]; sampler sampler3 [[ sampler(2), function_constant(hasLayer3) ]]; sampler sampler4 [[ sampler(3), function_constant(hasLayer4) ]]; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index c4c85ad2d4..b5b1425d3b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -291,6 +291,10 @@ const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerIn void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer) { + // Reminder: Encoder is allowed to be null when Plasma is precompiling pipeline states + // Metal needs to know if a shader is 2D or Cubic to compile shaders + // A null encoder signifies we should build the texture but not bind state + fPipeline->CheckTextureRef(layer); plBitmap* texture = layer->GetTexture(); @@ -309,14 +313,13 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); } - } - - if (fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag != layer->GetClampFlags()) - { - MTL::SamplerState* samplerState = fPipeline->fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(layer->GetClampFlags())); - encoder->setFragmentSamplerState(samplerState, offsetFromRootLayer); - - fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag = hsGMatState::hsGMatClampFlags(layer->GetClampFlags()); + if (fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag != layer->GetClampFlags()) + { + MTL::SamplerState* samplerState = fPipeline->fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(layer->GetClampFlags())); + encoder->setFragmentSamplerState(samplerState, offsetFromRootLayer); + + fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag = hsGMatState::hsGMatClampFlags(layer->GetClampFlags()); + } } } From a68ea2e3c4dc97ed2aded39947043442b2d941e1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 6 Sep 2023 23:37:17 -0700 Subject: [PATCH 132/165] Moving shadow casting into lightless custom shader MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Shadows no longer need to alter light state - which is prep for the light refactor. Adds a programmable shadow pipeline that has it’s own state independent of the light system. Rendering of shadows is now done directly instead of through light manipulation and the “fixed” function vertex shader. --- .../ShaderSrc/FixedPipelineShaders.metal | 87 ++++++++++++++++--- .../ShaderSrc/PlateShaders.metal | 1 - .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 53 +++++++---- .../pfMetalPipeline/plMetalPipeline.cpp | 70 +++++---------- .../pfMetalPipeline/plMetalPipeline.h | 2 +- .../pfMetalPipeline/plMetalPipelineState.cpp | 7 ++ .../pfMetalPipeline/plMetalPipelineState.h | 1 + 7 files changed, 140 insertions(+), 81 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 4483516b40..b0238399a6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -58,7 +58,7 @@ using namespace metal; #include "hsGMatStateEnums.h" -enum plUVWSrcModifiers: uint32_t { +enum plUVWSrcModifiers: uint32_t{ kUVWPassThru = 0x00000000, kUVWIdxMask = 0x0000ffff, kUVWNormal = 0x00010000, @@ -246,17 +246,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); //Fog - out.fogColor.a = 1.0; - if (uniforms.fogExponential > 0) { - out.fogColor.a = exp(-pow(uniforms.fogValues.y * length(vCamPosition), uniforms.fogValues.x)); - } else { - if (uniforms.fogValues.y > 0.0) { - const float start = uniforms.fogValues.x; - const float end = uniforms.fogValues.y; - out.fogColor.a = (end - length(vCamPosition.xyz)) / (end - start); - } - } - out.fogColor.rgb = uniforms.fogColor; + out.fogColor = uniforms.calcFog(vCamPosition); const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; @@ -388,6 +378,23 @@ float3 VertexUniforms::sampleLocation(size_t index, thread float3 *texCoords, co } return sampleCoord.xyz; } + +half4 VertexUniforms::calcFog(float4 camPosition) constant { + half4 resultColor; + if (fogExponential > 0) { + resultColor.a = exp(-pow(fogValues.y * length(camPosition), fogValues.x)); + } else { + if (fogValues.y > 0.0) { + const float start = fogValues.x; + const float end = fogValues.y; + resultColor.a = (end - length(camPosition.xyz)) / (end - start); + } else { + resultColor.a = 1.0h; + } + } + resultColor.rgb = fogColor; + return resultColor; +} half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const { @@ -592,6 +599,62 @@ fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) return half4(1.0h, 1.0h, 1.0h, currentAlpha); } + +//MARK: Shadow Casting shaders + +/* + In the Direct3D pipeline, lights were created and manipulated to draw shadows in the fixed function pipelines. + + This re-implements shadows in a programmable pipeline without altering the light state. This change should + allow lights to be managed more efficiently in since the same light no longer needs to be changed multiple + times mid render. The Direct3D pipeline would alter lights mid render to control shadow strength onto a mesh. + Instead, this shader takes a shadow state struct that describes the shadow source and has strength as a discrete + property. There is no need to push an entirely new light table. + */ + +vertex ColorInOut shadowCastVertexShader(Vertex in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + constant plShadowState & shadowState [[ buffer(VertexShaderArgumentIndexShadowState) ]]) +{ + ColorInOut out; + + float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix); + const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz; + // Shadow casting uses the diffuse material color to control opacity + const half4 MDiffuse = uniforms.diffuseCol; + + //w is attenation + float4 direction; + + if (shadowState.directional == true) { + // Directional Light with no attenuation + direction = float4(-(shadowState.lightDirection).xyz, 1.0); + } else { + // Omni Light in all directions + const float3 v2l = shadowState.lightPosition.xyz - position.xyz; + direction.xyz = normalize(v2l); + direction.w = 1.0; + } + + const float3 dotResult = dot(Ndirection, direction.xyz); + const half3 diffuse = MDiffuse.rgb * half3(max(0.0, dotResult)) * shadowState.power; + out.vtxColor = half4(diffuse, 1.f); + + const float4 vCamPosition = position * uniforms.worldToCameraMatrix; + + //Fog + out.fogColor = uniforms.calcFog(vCamPosition); + + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; + + for(size_t layer=0; layer texture [[ texture(16) ]], diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal index 5a8a36d925..1ac6c38a0a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal @@ -89,7 +89,6 @@ vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]], fragment float4 fragmentShader(ColorInOut in [[stage_in]], constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - constant plMetalFragmentShaderArgumentBuffer & fragmentShaderArgs [[ buffer(BufferIndexFragArgBuffer) ]], constant float & alpha [[ buffer(6) ]], texture2d colorMap [[ texture(Texture) ]]) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index 7e21960fea..ac9e54b332 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -52,34 +52,44 @@ typedef __attribute__((__ext_vector_type__(3))) half half3; typedef __attribute__((__ext_vector_type__(4))) half half4; #endif -enum plMetalShaderArgumentIndex +enum plMetalVertexShaderArgumentIndex { - //Texture is a legacy argument for the simpler plate shader - Texture = 1, + /// Material State BufferIndexState = 2, + /// Uniform table for Plasma dynamic shaders BufferIndexUniforms = 3, + /// Light Table BufferIndexLights = 4, - BufferIndexFragArgBuffer = 5, + /// Blend matrix for GPU side animation blending + BufferIndexBlendMatrix1 = 6, + /// Describes the state of a shadow caster for shadow cast shader + VertexShaderArgumentIndexShadowState = 9 +}; + +enum plMetalFragmentShaderArgumentIndex +{ + /// Texture is a legacy argument for the simpler plate shader + Texture = 1, + /// Fragment uniforms BufferIndexShadowCastFragArgBuffer = 4, - BufferIndexBlendMatrix1 = 6 + /// Legacy argument buffer + BufferIndexFragArgBuffer = 5, + /// Layer index of alpha for shadow fragment shader + FragmentShaderArgumentShadowAlphaSrc = 8 }; -enum plMetalVertexShaderUniform +enum plMetalVertexAttribute { + /// position of a vertex VertexAttributePosition = 0, + /// UV of a vertex. Reserves IDs 1-8. VertexAttributeTexcoord = 1, + /// Normal attribute of a vertex VertexAttributeNormal = 9, - VertexAttributeUVCount = 10, - VertexAttributeColor = 11, - VertexAttributeWeights = 12, -}; - -enum plMetalFragmentShaderUniform -{ - FragmentShaderArgumentShadowAlphaSrc = 8, - FragmentShaderArgumentPiggybackLayers = 9, - FragmentShaderArgumentNumPiggybackLayers = 10, - FragmentShaderOverrideLayer = 11 + /// Color attribute of a vertex + VertexAttributeColor = 10, + /// Animation weight of a vertex + VertexAttributeWeights = 11, }; enum plMetalFunctionConstant @@ -160,6 +170,7 @@ typedef struct UVOutDescriptor uvTransforms[8]; #ifdef __METAL_VERSION__ float3 sampleLocation(size_t index, thread float3 *texCoords, const float4 normal, const float4 camPosition) constant; + half4 calcFog(float4 camPosition) constant; #endif } VertexUniforms; @@ -170,5 +181,13 @@ typedef struct { plMetalShaderLightSource lampSources[kMetalMaxLightCount]; } plMetalLights; +typedef struct { + simd::float3 lightPosition; + simd::float3 lightDirection; + bool directional; + float power; + half opacity; +} plShadowState; + #endif /* ShaderTypes_h */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index bc39c61150..d982527df6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -3462,7 +3462,7 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) return true; } -// ISetupShadowLight ////////////////////////////////////////////////////////////////// +// ISetupShadowState ////////////////////////////////////////////////////////////////// // We use the shadow light to modulate the shadow effect in two ways while // projecting the shadow map onto the scene. // First, the intensity of the shadow follows the N dot L of the light on @@ -3472,49 +3472,30 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) // Second, we attenuate the whole shadow effect through the lights diffuse color. // We attenuate for different reasons, like the intensity of the light, or // to fade out a shadow as it gets too far in the distance to matter. -void plMetalPipeline::ISetupShadowLight(plShadowSlave* slave) +void plMetalPipeline::ISetupShadowState(plShadowSlave* slave, plShadowState& shadowState) { - plMetalShaderLightSource lRef = {}; - - lRef.diffuse.r - = lRef.diffuse.g - = lRef.diffuse.b - = slave->fPower; + shadowState.power = slave->fPower; slave->fSelfShadowOn = false; if( slave->Positional() ) { hsPoint3 position = slave->fLightPos; - lRef.position.x = position.fX; - lRef.position.y = position.fY; - lRef.position.z = position.fZ; - - //const float maxRange = 32767.f; - //lRef->fD3DInfo.Range = maxRange; - lRef.constAtten = 1.f; - lRef.linAtten = 0; - lRef.quadAtten = 0; + shadowState.lightPosition.x = position.fX; + shadowState.lightPosition.y = position.fY; + shadowState.lightPosition.z = position.fZ; - //lRef->fD3DInfo.Type = D3DLIGHT_POINT; - lRef.position.w = 1.0; + shadowState.directional = false; } else { hsVector3 dir = slave->fLightDir; - lRef.direction.x = dir.fX; - lRef.direction.y = dir.fY; - lRef.direction.z = dir.fZ; + shadowState.lightDirection.x = dir.fX; + shadowState.lightDirection.y = dir.fY; + shadowState.lightDirection.z = dir.fZ; - lRef.position.w = 0.0; + shadowState.directional = true; } - - //fD3DDevice->SetLight( lRef->fD3DIndex, &lRef->fD3DInfo ); - fLights.lampSources[0] = lRef; - fLights.count = 1; - - //Not sure hot to link lights in Metal. Do we even need to? - //slave->fLightIndex = lRef->fD3DIndex; } @@ -3958,7 +3939,8 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con // See ISetupShadowLight below for how the shadow light is used. // The shadow light isn't used in generating the shadow map, it's used // in projecting the shadow map onto the scene. - ISetupShadowLight(fShadows[i]); + plShadowState shadowState; + ISetupShadowState(fShadows[i], shadowState); struct plMetalFragmentShaderDescription passDescription; memset(&passDescription, 0, sizeof(passDescription)); @@ -4002,9 +3984,8 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con // so we cache whether the shadow light is set for regular or // self shadowing intensity. If what we're doing now is different // than what we're currently set for, set it again. - //if( selfShadowNow != fShadows[i]->fSelfShadowOn ) - //{ - plMetalShaderLightSource lRef = fLights.lampSources[0]; + if( selfShadowNow != fShadows[i]->fSelfShadowOn ) + { // We lower the power on self shadowing, because the artists like to // crank up the shadow strength to huge values to get a darker shadow @@ -4016,29 +3997,23 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con { plConst(float) kMaxSelfPower = 0.3f; float power = (float) fShadows[i]->fPower > kMaxSelfPower ? (float) kMaxSelfPower : ((float) fShadows[i]->fPower); - lRef.diffuse.r = lRef.diffuse.b = lRef.diffuse.g = power; + shadowState.power = power; } else { - lRef.diffuse.r = lRef.diffuse.b = lRef.diffuse.g = fShadows[i]->fPower; + shadowState.power = fShadows[i]->fPower; } - lRef.scale = 1.0; - fLights.lampSources[0] = lRef; // record which our intensity is now set for. fShadows[i]->fSelfShadowOn = selfShadowNow; - //} + } - // Enable the light. - //fD3DDevice->LightEnable(fShadows[i]->fLightIndex, true);*/ + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&shadowState, sizeof(shadowState), VertexShaderArgumentIndexShadowState); #ifndef PLASMA_EXTERNAL_RELEASE if (!IsDebugFlagSet(plPipeDbg::kFlagNoShadowApply)) #endif // PLASMA_EXTERNAL_RELEASE render.RenderPrims(); - - // Disable it again. - //fD3DDevice->LightEnable(fShadows[i]->fLightIndex, false); } } @@ -4112,9 +4087,6 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) // Set the D3D lighting/material model for projecting the shadow map onto this material. void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) { - IDisableLightsForShadow(); - //inlEnsureLightingOn(); - fCurrLightingMethod = plSpan::kLiteShadow; if( mat && mat->GetNumLayers() && mat->GetLayer(0) ) @@ -4129,9 +4101,6 @@ void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) fCurrentRenderPassUniforms->specularSrc = 0.0; fCurrentRenderPassUniforms->ambientSrc = 0.0; fCurrentRenderPassUniforms->globalAmb = 0.0; - - //fD3DDevice->SetMaterial(&d3dMat); - //fD3DDevice->SetRenderState( D3DRS_AMBIENT, 0 );*/ } // IDisableLightsForShadow /////////////////////////////////////////////////////////// @@ -4139,6 +4108,7 @@ void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) // the surface. void plMetalPipeline::IDisableLightsForShadow() { + //FIXME: Planned for removal - but used by projections. New light code will obsolete. int i; for( i = 0; i < 8; i++ ) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index ca61a25151..8a060a6f68 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -229,13 +229,13 @@ class plMetalPipeline : public pl3DPipeline void IResetRenderTargetPools(); void IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span); plMetalTextureRef* fULutTextureRef; - void ISetupShadowLight(plShadowSlave* slave); void IMakeRenderTargetPools(); hsGDeviceRef* SharedRenderTargetRef(plRenderTarget* share, plRenderTarget *owner); void IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef *vRef); void ISetupShadowRcvTextureStages(hsGMaterial* mat); void ISetupShadowSlaveTextures(plShadowSlave* slave); void ISetShadowLightState(hsGMaterial* mat); + void ISetupShadowState(plShadowSlave* slave, plShadowState& shadowState); void IDisableLightsForShadow(); void IReleaseRenderTargetPools(); void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index a9041f615f..48a6550bf5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -337,6 +337,13 @@ bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) co return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fFragmentShaderDescription == this->fFragmentShaderDescription; } +MTL::Function* plMetalRenderShadowPipelineState::GetVertexFunction(MTL::Library* library) { + return library->newFunction( + NS::String::string("shadowCastVertexShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error **)NULL + )->autorelease(); +} MTL::Function* plMetalRenderShadowPipelineState::GetFragmentFunction(MTL::Library* library) { return library->newFunction( diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 75fc3f262c..8b3acd6dd8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -223,6 +223,7 @@ class plMetalRenderShadowPipelineState: public plMetalMaterialPassPipelineState const NS::String* GetDescription() override { return NS::MakeConstantString("Shadow Span Render Pipeline"); }; + MTL::Function* GetVertexFunction(MTL::Library* library) override; MTL::Function* GetFragmentFunction(MTL::Library* library) override; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; virtual uint16_t GetID() const override { return 3; } ; From ac0884fd20a4684c6fd7992187c334c2126edee0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 9 Sep 2023 21:37:19 -0700 Subject: [PATCH 133/165] Fixing bug in projections. Projections altered the sampler state for layer 0 without updating state tracking. (Perhaps sampler state changes should be wrapped in a function.) --- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index d982527df6..008cec07e2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1344,6 +1344,7 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(tex->fTexture, 0); MTL::SamplerState* samplerState = fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(proj->GetClampFlags())); fDevice.CurrentRenderCommandEncoder()->setFragmentSamplerState(samplerState, 0); + fState.layerStates[0].clampFlag = hsGMatState::hsGMatClampFlags(proj->GetClampFlags()); // Okay, render it already. From 33e58a9a576ed866f9168f01f2c6d8770b3f53fa Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 10 Sep 2023 21:34:16 -0700 Subject: [PATCH 134/165] Adding graphics pipelines to pfFeatureInc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Should allow tests to build on Mac again. pfFeatureInc included the Metal pipeline header, but didn’t declare it as a dependency. --- Sources/Plasma/FeatureLib/inc/CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Sources/Plasma/FeatureLib/inc/CMakeLists.txt b/Sources/Plasma/FeatureLib/inc/CMakeLists.txt index 1469ade8b1..777c9a3e32 100644 --- a/Sources/Plasma/FeatureLib/inc/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/inc/CMakeLists.txt @@ -14,6 +14,9 @@ target_link_libraries(pfFeatureInc pfGameMgr pfJournalBook pfMessage + $<$:pfDXPipeline> + $<$:pfGLPipeline> + $<$:pfMetalPipeline> pfPython pfSurface ) From b8cf1630b63ff102792c9430dffccb9252ed4f0a Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Mon, 11 Sep 2023 19:45:49 -0700 Subject: [PATCH 135/165] Starting cleanup of shader binding names --- .../ShaderSrc/BiasNormals.metal | 2 +- .../ShaderSrc/CompCosines.metal | 4 +- .../ShaderSrc/FixedPipelineShaders.metal | 18 +++---- .../pfMetalPipeline/ShaderSrc/Grass.metal | 2 +- .../ShaderSrc/PlateShaders.metal | 6 +-- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 51 +++++++++++-------- .../ShaderSrc/WaveDec1Lay_7.metal | 2 +- .../ShaderSrc/WaveDecEnv.metal | 2 +- .../pfMetalPipeline/ShaderSrc/WaveRip.metal | 2 +- .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 2 +- .../pfMetalPipeline/plMetalFragmentShader.cpp | 2 +- .../plMetalMaterialShaderRef.cpp | 6 +-- .../pfMetalPipeline/plMetalPipeline.cpp | 14 ++--- .../pfMetalPipeline/plMetalVertexShader.cpp | 2 +- 14 files changed, 61 insertions(+), 54 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal index 1961896856..b31778c128 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal @@ -67,7 +67,7 @@ typedef struct { } vs_BiasNormalsOut; vertex vs_BiasNormalsOut vs_BiasNormals(Vertex in [[stage_in]], - constant vs_BiasNormalsUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant vs_BiasNormalsUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_BiasNormalsOut out; out.position = float4(in.position, 1.0); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal index 33d220e491..a61df286ad 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal @@ -62,7 +62,7 @@ typedef struct { } vs_CompCosinesnInOut; vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[stage_in]], - constant vs_CompCosinesUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant vs_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_CompCosinesnInOut out; out.position = float4(in.position, 1.0); @@ -94,7 +94,7 @@ fragment float4 ps_CompCosines(vs_CompCosinesnInOut in [[stage_in]], texture2d t1 [[ texture(1) ]], texture2d t2 [[ texture(2) ]], texture2d t3 [[ texture(3) ]], - constant ps_CompCosinesUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant ps_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { // Composite the cosines together. // Input map is cosine(pix) for each of // the 4 waves. diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index b0238399a6..c69f772a4d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -136,7 +136,7 @@ typedef struct { texturecube cubicTexture6 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 5), function_constant(hasCubicTexture6) ]]; texturecube cubicTexture7 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 6), function_constant(hasCubicTexture7) ]]; texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasCubicTexture8) ]]; - const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(BufferIndexFragArgBuffer) ]]; + const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(FragmentShaderArgumentUniforms) ]]; half4 sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const; //number of layers is variable, so have to declare these samplers the ugly way sampler samplers [[ sampler(0), function_constant(hasLayer1) ]]; @@ -174,9 +174,9 @@ typedef struct } ShadowCasterInOut; vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - constant plMetalLights & lights [[ buffer(BufferIndexLights) ]], - constant float4x4 & blendMatrix1 [[ buffer(BufferIndexBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], + constant plMetalLights & lights [[ buffer(VertexShaderArgumentLights) ]], + constant float4x4 & blendMatrix1 [[ buffer(VertexShaderArgumentBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) { ColorInOut out; //we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. @@ -577,7 +577,7 @@ constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t b } vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]]) + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]]) { ShadowCasterInOut out; @@ -613,8 +613,8 @@ fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) */ vertex ColorInOut shadowCastVertexShader(Vertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], - constant plShadowState & shadowState [[ buffer(VertexShaderArgumentIndexShadowState) ]]) + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], + constant plShadowState & shadowState [[ buffer(VertexShaderArgumentShadowState) ]]) { ColorInOut out; @@ -658,9 +658,9 @@ vertex ColorInOut shadowCastVertexShader(Vertex in [[stage_in]], fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], texture2d texture [[ texture(16) ]], - constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(BufferIndexShadowCastFragArgBuffer) ]], + constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(FragmentShaderArgumentShadowCastUniforms) ]], FragmentShaderArguments layers, - constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowAlphaSrc) ]]) + constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowCastAlphaSrc) ]]) { float3 sampleCoords = in.texCoord1; if(fragmentUniforms.pointLightCast) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal index bc10488b2a..032b186493 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal @@ -33,7 +33,7 @@ typedef struct { } vs_GrassInOut; vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], - constant vs_GrassUniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant vs_GrassUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_GrassInOut out; float4 r0 = (in.position.x * uniforms.waveDirX) + (in.position.y * uniforms.waveDirX); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal index 1ac6c38a0a..810ddb7355 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal @@ -71,7 +71,7 @@ typedef struct } ColorInOut; vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], uint v_id [[vertex_id]]) { ColorInOut out; @@ -88,9 +88,9 @@ vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]], } fragment float4 fragmentShader(ColorInOut in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(BufferIndexState) ]], + constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], constant float & alpha [[ buffer(6) ]], - texture2d colorMap [[ texture(Texture) ]]) + texture2d colorMap [[ texture( FragmentShaderArgumentTexture) ]]) { constexpr sampler colorSampler(mip_filter::linear, mag_filter::linear, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index ac9e54b332..0269390866 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -52,55 +52,62 @@ typedef __attribute__((__ext_vector_type__(3))) half half3; typedef __attribute__((__ext_vector_type__(4))) half half4; #endif -enum plMetalVertexShaderArgumentIndex +enum plMetalVertexShaderArgument { /// Material State - BufferIndexState = 2, + VertexShaderArgumentFixedFunctionUniforms = 2, /// Uniform table for Plasma dynamic shaders - BufferIndexUniforms = 3, + VertexShaderArgumentMaterialShaderUniforms = 3, /// Light Table - BufferIndexLights = 4, + VertexShaderArgumentLights = 4, /// Blend matrix for GPU side animation blending - BufferIndexBlendMatrix1 = 6, + VertexShaderArgumentBlendMatrix1 = 6, /// Describes the state of a shadow caster for shadow cast shader - VertexShaderArgumentIndexShadowState = 9 + VertexShaderArgumentShadowState = 9 }; enum plMetalFragmentShaderArgumentIndex { /// Texture is a legacy argument for the simpler plate shader - Texture = 1, + FragmentShaderArgumentTexture = 1, /// Fragment uniforms - BufferIndexShadowCastFragArgBuffer = 4, + FragmentShaderArgumentShadowCastUniforms = 4, /// Legacy argument buffer - BufferIndexFragArgBuffer = 5, + FragmentShaderArgumentUniforms = 5, /// Layer index of alpha for shadow fragment shader - FragmentShaderArgumentShadowAlphaSrc = 8 + FragmentShaderArgumentShadowCastAlphaSrc = 8 }; enum plMetalVertexAttribute { /// position of a vertex - VertexAttributePosition = 0, + VertexAttributePosition = 0, /// UV of a vertex. Reserves IDs 1-8. - VertexAttributeTexcoord = 1, + VertexAttributeTexcoord = 1, /// Normal attribute of a vertex - VertexAttributeNormal = 9, + VertexAttributeNormal = 9, /// Color attribute of a vertex - VertexAttributeColor = 10, + VertexAttributeColor = 10, /// Animation weight of a vertex - VertexAttributeWeights = 11, + VertexAttributeWeights = 11, }; +/// Arguments to the shader compiler to control output enum plMetalFunctionConstant { - FunctionConstantNumUVs = 0, - FunctionConstantNumLayers = 1, - FunctionConstantSources = 2, - FunctionConstantBlendModes = 10, - FunctionConstantLayerFlags = 18, - FunctionConstantNumWeights = 26, - FunctionConstantSampleTypes = 34, + /// Numbrer of UVs in the FVF vertex layout. + FunctionConstantNumUVs = 0, + /// Number of layers the shader will need to render + FunctionConstantNumLayers = 1, + /// Source type of the material texture. Metal needs to know if the texture will + /// be cubic or 2D in advance. Eight values reserved. + FunctionConstantSources = 2, + /// Blend modes for each of the layers. + FunctionConstantBlendModes = 10, + /// Render flags for each layer. Eight values reserved. + FunctionConstantLayerFlags = 18, + /// Numbrer of weights in the FVF vertex layout. + FunctionConstantNumWeights = 26, }; enum plMetalLayerPassType: uint8_t diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal index 80db38f3a4..58f369cd23 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal @@ -92,7 +92,7 @@ typedef struct { } vs_WaveDev1Lay_7InOut; vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], - constant vs_WaveDev1Lay_7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant vs_WaveDev1Lay_7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_WaveDev1Lay_7InOut out; // Store our input position in world space in r6 float4 worldPosition = float4(0); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index bf19dfee30..78be7563fd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -60,7 +60,7 @@ typedef struct { } vs_WaveDecEnv7InOut; vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], - constant vs_WaveDecEnv7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant vs_WaveDecEnv7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_WaveDecEnv7InOut out; // Store our input position in world space in r6 diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal index 381affab6e..73ee6a44b2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal @@ -89,7 +89,7 @@ typedef struct { } waveRipInOut; vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], - constant vs_WaveRip7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant vs_WaveRip7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { waveRipInOut out; // Store our input position in world space in r6 diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index c4ccd6de0c..a587a56473 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -94,7 +94,7 @@ typedef struct { } vs_WaveFixedFin7InOut; vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], - constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(BufferIndexUniforms) ]]) { + constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_WaveFixedFin7InOut out; // Store our input position in world space in r6 diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp index e411fb6831..c229751f7f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp @@ -73,7 +73,7 @@ bool plMetalFragmentShader::ISetConstants(plMetalPipeline* pipe) if( fOwner->GetNumConsts() ) { float *ptr = (float *)fOwner->GetConstBasePtr(); - pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, BufferIndexUniforms); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); } return true; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index b5b1425d3b..2ed49ceb59 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -150,7 +150,7 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *en IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer); } - encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, BufferIndexFragArgBuffer); + encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, FragmentShaderArgumentUniforms); } void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription* passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) @@ -180,7 +180,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encode return layer; }); - encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), BufferIndexFragArgBuffer); + encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), FragmentShaderArgumentUniforms); } void plMetalMaterialShaderRef::EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform) { @@ -216,7 +216,7 @@ void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encode if (plCubicEnvironmap::ConvertNoRef(layer->GetTexture()) != nullptr) { } else if (plMipmap::ConvertNoRef(layer->GetTexture()) != nullptr || plRenderTarget::ConvertNoRef(layer->GetTexture()) != nullptr) { - encoder->setFragmentTexture(texRef->fTexture, Texture); + encoder->setFragmentTexture(texRef->fTexture, FragmentShaderArgumentTexture); } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 008cec07e2..b8e3c346ee 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -151,12 +151,12 @@ bool plRenderTriListFunc::RenderPrims() const plProfile_Inc(DrawPrimStatic); size_t uniformsSize = offsetof(VertexUniforms, uvTransforms) + sizeof(UVOutDescriptor) * fDevice->fPipeline->fCurrNumLayers; - fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), BufferIndexState); + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); plMetalLights* lights = &fDevice->fPipeline->fLights; size_t lightSize = offsetof(plMetalLights, lampSources) + (sizeof(plMetalShaderLightSource) * lights->count); - fDevice->CurrentRenderCommandEncoder()->setVertexBytes(lights, sizeof(plMetalLights), BufferIndexLights); + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(lights, sizeof(plMetalLights), VertexShaderArgumentLights); fDevice->CurrentRenderCommandEncoder()->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, fNumTris, MTL::IndexTypeUInt16, fDevice->fCurrentIndexBuffer, (sizeof(uint16_t) * fIStart)); } @@ -1133,7 +1133,7 @@ void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& { matrix_float4x4 mat; hsMatrix2SIMD(drawable->GetPaletteMatrix(span.fBaseMatrix+1), &mat); - fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), BufferIndexBlendMatrix1); + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), VertexShaderArgumentBlendMatrix1); } fCurrentRenderPassUniforms->projectionMatrix = fDevice.fMatrixProj; @@ -2615,7 +2615,7 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) //FIXME: Hacking the old texture drawing into the plate path mRef->prepareTextures(fDevice.CurrentRenderCommandEncoder(), 0); - fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&uniforms, sizeof(VertexUniforms), BufferIndexState); + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&uniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); pm->EncodeDraw(fDevice.CurrentRenderCommandEncoder()); @@ -4009,7 +4009,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con fShadows[i]->fSelfShadowOn = selfShadowNow; } - fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&shadowState, sizeof(shadowState), VertexShaderArgumentIndexShadowState); + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&shadowState, sizeof(shadowState), VertexShaderArgumentShadowState); #ifndef PLASMA_EXTERNAL_RELEASE if (!IsDebugFlagSet(plPipeDbg::kFlagNoShadowApply)) @@ -4081,7 +4081,7 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) fCurrentRenderPassUniforms->uvTransforms[2].transform = tXfm; } - fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&layerIndex, sizeof(int), FragmentShaderArgumentShadowAlphaSrc); + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&layerIndex, sizeof(int), FragmentShaderArgumentShadowCastAlphaSrc); } // ISetShadowLightState ////////////////////////////////////////////////////////////////// @@ -4144,7 +4144,7 @@ void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) plMetalShadowCastFragmentShaderArgumentBuffer uniforms; uniforms.pointLightCast = slave->fView.GetOrthogonal() ? false : true; - fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&uniforms, sizeof(plMetalShadowCastFragmentShaderArgumentBuffer), BufferIndexShadowCastFragArgBuffer); + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&uniforms, sizeof(plMetalShadowCastFragmentShaderArgumentBuffer), FragmentShaderArgumentShadowCastUniforms); hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w; simd_float4x4 tXfm; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp index afbc76f963..0f9a94c507 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp @@ -73,7 +73,7 @@ bool plMetalVertexShader::ISetConstants(plMetalPipeline* pipe) if( fOwner->GetNumConsts() ) { float *ptr = (float *)fOwner->GetConstBasePtr(); - pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, BufferIndexUniforms); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); } return true; From f0041e9ff218f6553d7e20ad44c68e16430e9cc1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 15 Sep 2023 16:20:52 -0700 Subject: [PATCH 136/165] Fixing network error when status URL is not present --- Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm index 2f75d32d7d..f1e34e841a 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm @@ -64,6 +64,13 @@ - (void)loadServerStatus { NSString* urlString = [NSString stringWithSTString:GetServerStatusUrl()]; NSURL* url = [NSURL URLWithString:urlString]; + + if (!url || url.host == nil) + { + self.serverStatusString = @""; + return; + } + NSURLSessionConfiguration* URLSessionConfiguration = [NSURLSessionConfiguration ephemeralSessionConfiguration]; NSURLSession* session = [NSURLSession sessionWithConfiguration:URLSessionConfiguration From 99d3c98b6fe8a2e8dec5c48f94630086432d14e0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 23 Sep 2023 14:11:10 -0700 Subject: [PATCH 137/165] Warning cleanup/general code cleanup --- .../ShaderSrc/FixedPipelineShaders.metal | 4 +- .../pfMetalPipeline/plMetalDevice.cpp | 24 ++++--- .../pfMetalPipeline/plMetalDeviceRef.h | 4 +- .../plMetalMaterialShaderRef.cpp | 17 ++--- .../plMetalMaterialShaderRef.h | 8 +-- .../pfMetalPipeline/plMetalPipeline.cpp | 69 ++++++++++--------- .../pfMetalPipeline/plMetalPipeline.h | 2 +- .../pfMetalPipeline/plMetalPipelineState.h | 1 + 8 files changed, 68 insertions(+), 61 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index c69f772a4d..e6c310aca1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -119,7 +119,7 @@ constant const bool hasCubicTexture6 = (sourceType6 == PassTypeCubicTexture && h constant const bool hasCubicTexture7 = (sourceType7 == PassTypeCubicTexture && hasLayer7); constant const bool hasCubicTexture8 = (sourceType8 == PassTypeCubicTexture && hasLayer8); -typedef struct { +struct FragmentShaderArguments { texture2d textures [[ texture(FragmentShaderArgumentAttributeTextures), function_constant(has2DTexture1) ]]; texture2d texture2 [[ texture(FragmentShaderArgumentAttributeTextures + 1), function_constant(has2DTexture2) ]]; texture2d texture3 [[ texture(FragmentShaderArgumentAttributeTextures + 2), function_constant(has2DTexture3) ]]; @@ -147,7 +147,7 @@ typedef struct { sampler sampler6 [[ sampler(5), function_constant(hasLayer6) ]]; sampler sampler7 [[ sampler(6), function_constant(hasLayer7) ]]; sampler sampler8 [[ sampler(7), function_constant(hasLayer8) ]]; -} FragmentShaderArguments; +}; typedef struct { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index e8a553fef0..0526b3a3ef 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -546,9 +546,8 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr uint8_t* srcVPtr = group->GetVertBufferData(idx); plGBufferColor* const srcCPtr = group->GetColorBufferData(idx); - const int numCells = group->GetNumCells(idx); - int i; - for (i = 0; i < numCells; i++) + const size_t numCells = group->GetNumCells(idx); + for (size_t i = 0; i < numCells; i++) { plGBufferCell* cell = group->GetCell(idx, i); @@ -789,8 +788,8 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *i #define HACK_LEVEL_SIZE 1 #if HACK_LEVEL_SIZE - uint width = tRef->fTexture->width(); - uint height = tRef->fTexture->height(); + NS::UInteger width = tRef->fTexture->width(); + NS::UInteger height = tRef->fTexture->height(); #endif if (tRef->fLevels == -1) { @@ -801,11 +800,11 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *i for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { img->SetCurrLevel(lvl); #if HACK_LEVEL_SIZE - uint levelWidth = (width / exp2(lvl)); - uint levelHeight = (height / exp2(lvl)); + NS::UInteger levelWidth = (width / exp2(lvl)); + NS::UInteger levelHeight = (height / exp2(lvl)); #else - uint levelWidth = img->GetCurrWidth(); - uint levelHeight = img->GetCurrHeight(); + NS::UInteger levelWidth = img->GetCurrWidth(); + NS::UInteger levelHeight = img->GetCurrHeight(); #endif switch (img->fDirectXInfo.fCompressionType) { @@ -867,6 +866,7 @@ void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* im } tRef->fLevels = img->GetNumLevels() - 1; + //FIXME: Is this texture check actually needed //if(!tRef->fTexture) { ConfigureAllowedLevels(tRef, img); @@ -1177,14 +1177,15 @@ void plMetalDevice::SubmitCommandBuffer() fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); fCurrentCommandBuffer->commit(); - //as we more tightly manage resource sync we may be able to avoid waiting for the frame to complete - //fCurrentCommandBuffer->waitUntilCompleted(); fCurrentCommandBuffer->release(); fCurrentCommandBuffer = nil; fCurrentDrawable->release(); fCurrentDrawable = nil; + // Reset the clear colors for the next pass + // Metal clears on framebuffer load - so don't cause a clear + // command in this pass to affect the next pass. fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); fClearDrawableColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); fShouldClearRenderTarget = false; @@ -1286,6 +1287,7 @@ CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() void plMetalDevice::BlitTexture(MTL::Texture* src, MTL::Texture* dst) { + //FIXME: BlitTexture current unused - this used to create private GPU only textures through a copy from a CPU texture. if (fBlitCommandEncoder == nullptr) { fBlitCommandBuffer = fCommandQueue->commandBuffer()->retain(); //enqueue so we go to the front of the line before render diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h index 4a58756a9a..e6d69ca942 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -107,7 +107,7 @@ class plMetalBufferPoolRef : public plMetalDeviceRef { } //update the current buffer focused, if the is no buffer to focus set it to null - uint currentSize = fBuffers[fCurrentFrame].size(); + uint32_t currentSize = uint32_t(fBuffers[fCurrentFrame].size()); if(fCurrentPass < currentSize) { fBuffer = fBuffers[fCurrentFrame][fCurrentPass]; } else { @@ -121,7 +121,7 @@ class plMetalBufferPoolRef : public plMetalDeviceRef { void SetBuffer(MTL::Buffer* buffer) { fBuffer = buffer->retain(); - uint currentSize = fBuffers[fCurrentFrame].size(); + uint32_t currentSize = uint32_t(fBuffers[fCurrentFrame].size()); //if the current vector doesn't have enough room for the entry, resize it if(fCurrentPass >= currentSize) { fBuffers[fCurrentFrame].resize(++currentSize); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 2ed49ceb59..c4ef73a418 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -123,8 +123,7 @@ void plMetalMaterialShaderRef::CheckMateralRef() //fast encode doesn't support piggybacks or push over layers, but it does use preloaded data on the GPU so it's much faster. Use this encoder if there are no piggybacks or pushover layers void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass) { - size_t i = 0; - for (i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { + for (uint32_t i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { plLayerInterface* layer = fMaterial->GetLayer(i); if (!layer) { @@ -222,12 +221,11 @@ void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encode void plMetalMaterialShaderRef::ILoopOverLayers() { - size_t j = 0; - size_t pass = 0; + uint32_t pass = 0; - for (j = 0; j < fMaterial->GetNumLayers(); ) + for (uint32_t j = 0; j < fMaterial->GetNumLayers(); ) { - size_t currLayer = j; + uint32_t currLayer = j; //Create "fast encode" buffers //Fast encode can be used when there are no piggybacks or pushover layers. We'll load as much of the @@ -343,13 +341,12 @@ uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) return currNumLayers; } - int i; - int maxLayers = 8; + uint32_t maxLayers = 8; if (which + maxLayers > fMaterial->GetNumLayers()) { - maxLayers = fMaterial->GetNumLayers() - which; + maxLayers = uint32_t(fMaterial->GetNumLayers()) - which; } - for (i = currNumLayers; i < maxLayers; i++) { + for (uint32_t i = currNumLayers; i < maxLayers; i++) { plLayerInterface* lay = fMaterial->GetLayer(which + i); // Ignoring max UVW limit diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 814a2fda7c..97d69b2585 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -62,7 +62,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef //temporary holder for the fragment shader to use, we don't own this reference MTL::Function* fFragFunction; private: - std::vector fPassIndices; + std::vector fPassIndices; //FIXME: This should be retained/released MTL::Device* fDevice; std::vector fPassArgumentBuffers; @@ -77,9 +77,9 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void Release(); void CheckMateralRef(); - size_t GetNumPasses() const { return fNumPasses; } + uint32_t GetNumPasses() const { return fNumPasses; } - size_t GetPassIndex(size_t which) const { return fPassIndices[which]; } + uint32_t GetPassIndex(size_t which) const { return fPassIndices[which]; } const std::vector GetLayersForPass(size_t pass) { return fPasses[pass]; } void EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription *passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform); @@ -87,7 +87,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef //probably not a good idea to call prepareTextures directly //mostly just a hack to keep plates working for now void prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass); - std::vector fPassLengths; + std::vector fPassLengths; // Set the current Plasma state based on the input layer state and the material overrides. // fMatOverOn overrides to set a state bit whether it is set in the layer or not. diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index b8e3c346ee..2c791761f2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -183,14 +183,8 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons // Won't hurt us unless we try to many things at once. fMaxPiggyBacks = fMaxLayersAtOnce >> 1; - // Less than 4 layers at once means we have to fallback on uv bumpmapping - if (fMaxLayersAtOnce < 4) - SetDebugFlag(plPipeDbg::kFlagBumpUV, true); - //plDynamicCamMap::SetCapable(false); - //plQuality::SetQuality(fDefaultPipeParams.VideoQuality); - //plQuality::SetCapability(fDefaultPipeParams.VideoQuality); + // Metal is always PS3 capable plQuality::SetCapability(plQuality::kPS_3); - //plShadowCaster::EnableShadowCast(false); fDevice.SetMaxAnsiotropy(fInitialPipeParams.AnisotropicLevel); fDevice.SetMSAASampleCount(fInitialPipeParams.AntiAliasingAmount); @@ -261,27 +255,22 @@ bool plMetalPipeline::PrepForRender(plDrawable *drawable, std::vector & return false; } - // Other stuff that we're ignoring for now... - plProfile_EndTiming(PrepDrawable); return true; } plTextFont *plMetalPipeline::MakeTextFont(ST::string face, uint16_t size) { - plTextFont *font; - - - font = new plMetalTextFont( this, &fDevice ); - if (font == nullptr) - return nullptr; + plTextFont *font = new plMetalTextFont( this, &fDevice ); font->Create( face, size ); font->Link( &fTextFontRefList ); - return font; } -bool plMetalPipeline::OpenAccess(plAccessSpan &dst, plDrawableSpans *d, const plVertexSpan *span, bool readOnly) { return false; } +bool plMetalPipeline::OpenAccess(plAccessSpan &dst, plDrawableSpans *d, const plVertexSpan *span, bool readOnly) { + //FIXME: Whats this? + return false; +} bool plMetalPipeline::CloseAccess(plAccessSpan &acc) { return false; } @@ -381,12 +370,9 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) plCubicRenderTarget *cubicRT; // If we have Shader Model 3 and support non-POT textures, let's make reflections the pipe size -#if 1 if (plDynamicCamMap* camMap = plDynamicCamMap::ConvertNoRef(owner)) { - //if ((plQuality::GetCapability() > plQuality::kPS_2) && fSettings.fD3DCaps & kCapsNpotTextures) - camMap->ResizeViewport(IGetViewTransform()); + camMap->ResizeViewport(IGetViewTransform()); } -#endif /// Check--is this renderTarget really a child of a cubicRenderTarget? if (owner->GetParent()) { @@ -410,6 +396,9 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) owner->GetHeight(), false); if (fDevice.fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { + // on Apple Silicon GPUs - don't allocate memory to back the render target + // this assumes the render target only needs to survive this render pass + //FIXME: Do we need to promise the output survives the render pass? depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); @@ -609,6 +598,8 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) bool plMetalPipeline::BeginRender() { + // leaking is bad - create an autorelease pool to dispose + // of autoreleased Metal resources at the end of the pass fCurrentPool = NS::AutoreleasePool::alloc()->init(); // offset transform RefreshScreenMatrices(); @@ -638,6 +629,7 @@ bool plMetalPipeline::BeginRender() drawable->release(); /// If we have a renderTarget active, use its viewport + //FIXME: New drawables should inherit existing viewport //fDevice.SetViewport(); } @@ -872,6 +864,12 @@ bool plMetalPipeline::SetGamma(const uint16_t *const tabR, const uint16_t *const fDevice.fGammaLUTTexture = nullptr; } + /* + Plasma has multiple types of gamma corrections it can do - and the engine reserves + the right to create any color correct LUT. Ugh. Load the LUT into a texture as 8 bit + per channel data. The Metal renderer supports up to 10 bit colors - but it can subsample + the texture to interpolate the colors in between what the LUT defines. + */ MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); texDescriptor->setTextureType(MTL::TextureType1DArray); texDescriptor->setWidth(256); @@ -895,6 +893,13 @@ bool plMetalPipeline::SetGamma10(const uint16_t *const tabR, const uint16_t *con fDevice.fGammaLUTTexture = nullptr; } + /* + Loads in a real 10 bit color LUT for fancy displays. This LUT contains + way more data - but the shader doesn't care. The shader does an x lookup + by normalized co-ordinate - not value. So the width of the texture can + vary. + */ + MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); texDescriptor->setTextureType(MTL::TextureType1DArray); texDescriptor->setWidth(1024); @@ -913,6 +918,7 @@ bool plMetalPipeline::SetGamma10(const uint16_t *const tabR, const uint16_t *con bool plMetalPipeline::CaptureScreen(plMipmap *dest, bool flipVertical, uint16_t desiredWidth, uint16_t desiredHeight) { //FIXME: Screen capture + //FIXME: Double fix me - wasn't this working? return false; } @@ -937,7 +943,7 @@ plMipmap *plMetalPipeline::ExtractMipMap(plRenderTarget *targ) plMipmap* mipMap = new plMipmap(width, height, plMipmap::kARGB32Config, 1); uint8_t* ptr = (uint8_t*)(ref->fTexture->buffer()->contents()); - const int pitch = ref->fTexture->width() * 4; + const NS::UInteger pitch = ref->fTexture->width() * 4; ref->fTexture->getBytes(mipMap->GetAddr32(0, 0), pitch, MTL::Region(0, 0, width, height), 0); @@ -1200,7 +1206,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, IPushPiggyBacks(material); hsRefCnt_SafeAssign(fCurrMaterial, material); - size_t pass; + uint32_t pass; for (pass = 0; pass < mRef->GetNumPasses(); pass++) { if ( IHandleMaterialPass(material, pass, &span, vRef) ) { @@ -1489,8 +1495,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) plRenderTriListFunc render(&fDevice, 0, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength); - size_t pass; - for (pass = 0; pass < mRef->GetNumPasses(); pass++) { + for (int32_t pass = 0; pass < mRef->GetNumPasses(); pass++) { IHandleMaterialPass(material, pass, &span, vRef); if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) { @@ -1636,7 +1641,7 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, std::vector& spanLights = currSpan->GetLightList(false); - int numActivePiggyBacks = 0; + size_t numActivePiggyBacks = 0; if( !(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive) ) { /// Tack lightmap onto last stage if we have one @@ -1700,7 +1705,7 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, // Note that the lighting pipe constants are NOT implemented. void plMetalPipeline::ISetPipeConsts(plShader* shader) { - int n = shader->GetNumPipeConsts(); + size_t n = shader->GetNumPipeConsts(); int i; for( i = 0; i < n; i++ ) { @@ -1906,6 +1911,8 @@ void plMetalPipeline::ISetPipeConsts(plShader* shader) case plPipeConst::kPointLight2: case plPipeConst::kPointLight3: case plPipeConst::kPointLight4: + case plPipeConst::kColorFilter: + case plPipeConst::kMaxType: break; } } @@ -2396,7 +2403,7 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj) { const size_t numLights = kMetalMaxLightCount; - size_t i = 0; + int32_t i = 0; int32_t startScale; float threshhold; float overHold = 0.3; @@ -2735,7 +2742,7 @@ void plMetalPipeline::IPushProjPiggyBack(plLayerInterface* li) return; fPiggyBackStack.push_back(li); - fActivePiggyBacks = fPiggyBackStack.size() - fMatPiggyBacks; + fActivePiggyBacks = uint32_t(fPiggyBackStack.size()) - fMatPiggyBacks; fForceMatHandle = true; } @@ -2806,9 +2813,9 @@ void plMetalPipeline::IPopPiggyBacks() // ISetNumActivePiggyBacks ///////////////////////////////////////////// // Calculate the number of active piggy backs. -int plMetalPipeline::ISetNumActivePiggyBacks() +size_t plMetalPipeline::ISetNumActivePiggyBacks() { - return fActivePiggyBacks = std::min(static_cast(fMaxPiggyBacks), fPiggyBackStack.size()); + return fActivePiggyBacks = std::min(fMaxPiggyBacks, uint32_t(fPiggyBackStack.size())); } struct plAVTexVert { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 8a060a6f68..27ee222bad 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -206,7 +206,7 @@ class plMetalPipeline : public pl3DPipeline void IPopPiggyBacks(); void IPushProjPiggyBack(plLayerInterface* li); void IPopProjPiggyBacks(); - int ISetNumActivePiggyBacks(); + size_t ISetNumActivePiggyBacks(); bool ICheckAuxBuffers(const plAuxSpan* span); void ISetPipeConsts(plShader* shader); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 8b3acd6dd8..73e867d76c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -73,6 +73,7 @@ class plMetalPipelineState { virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) = 0; + virtual ~plMetalPipelineState() = default; protected: plMetalDevice* fDevice; virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const = 0; From fbffe1727c85a712da6b5462ac4e7eaeab9552fa Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 1 Oct 2023 21:16:49 -0700 Subject: [PATCH 138/165] Re-enabling the light prioritization path --- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 2c791761f2..53aa2b437e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -2400,6 +2400,10 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye } } +// ISelectLights /////////////////////////////////////////////////////////////// +// Find the strongest numLights lights to illuminate the span with. +// Weaker lights are faded out in effect so they won't pop when the +// strongest N changes membership. void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj) { const size_t numLights = kMetalMaxLightCount; @@ -2433,12 +2437,7 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef /// fade them out to nothing as they get closer to the bottom. This way, they fade /// out of existence instead of pop out. - //FIXME: In Metal, I'm not sure what this is doing. These lights won't be visible, and visible lights are always fully scaled. - // Note from the DX version of the source: - // Find the strongest numLights lights to illuminate the span with. - // Weaker lights are faded out in effect so they won't pop when the - // strongest N changes membership. - /*if (i < spanLights.size() - 1 && i > 0) { + if (i < spanLights.size() - 1 && i > 0) { threshhold = span->GetLightStrength(i, proj); i--; overHold = threshhold * 1.5f; @@ -2453,8 +2452,7 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef IScaleLight(i, (1 - scale) * span->GetLightScale(i, proj)); } startScale = i + 1; - }*/ - + } /// Make sure those lights that aren't scaled....aren't for (i = 0; i < startScale; i++) { From 156fdbd586faca29a7cd5371a6e06a252d9363c8 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 15 Oct 2023 14:48:28 -0700 Subject: [PATCH 139/165] Adding clang-format --- .../FeatureLib/pfMetalPipeline/.clang-format | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) create mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format b/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format new file mode 100644 index 0000000000..57db9a19a7 --- /dev/null +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format @@ -0,0 +1,33 @@ +--- +BasedOnStyle: Google +--- +Language: Cpp +# Modifications to the style for Plasma go here +IndentWidth: 4 +ColumnLimit: 0 +BraceWrapping: + AfterClass: true + AfterControlStatement: MultiLine + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + BeforeCatch: false + BeforeElse: false + IndentBraces: false +BreakBeforeBraces: Custom +AlignConsecutiveDeclarations: AcrossComments +AlignConsecutiveAssignments: + Enabled: False +ReflowComments: false +LambdaBodyIndentation: OuterScope +AllowShortBlocksOnASingleLine: true +AllowShortFunctionsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +PackConstructorInitializers: CurrentLine +ObjCBlockIndentWidth: 4 +--- +Language: ObjC +# Obj-C specific settings go here From b5f47c13fc496e22569436f4ea6fd7526fb998b8 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 18 Oct 2023 19:22:41 -0700 Subject: [PATCH 140/165] Updating code to match style --- .../FeatureLib/pfMetalPipeline/.clang-format | 12 +- .../pfMetalPipeline/plMetalDevice.cpp | 790 +++--- .../pfMetalPipeline/plMetalDevice.h | 245 +- .../plMetalDevicePerformanceShaders.mm | 93 +- .../pfMetalPipeline/plMetalDeviceRef.cpp | 19 +- .../pfMetalPipeline/plMetalDeviceRef.h | 227 +- .../pfMetalPipeline/plMetalDeviceRefs.cpp | 30 +- .../pfMetalPipeline/plMetalEnumerate.mm | 80 +- .../pfMetalPipeline/plMetalFragmentShader.cpp | 22 +- .../pfMetalPipeline/plMetalFragmentShader.h | 10 +- .../plMetalMaterialShaderRef.cpp | 242 +- .../plMetalMaterialShaderRef.h | 81 +- .../pfMetalPipeline/plMetalPipeline.cpp | 2339 ++++++++--------- .../pfMetalPipeline/plMetalPipeline.h | 299 +-- .../pfMetalPipeline/plMetalPipelineState.cpp | 312 +-- .../pfMetalPipeline/plMetalPipelineState.h | 332 +-- .../pfMetalPipeline/plMetalPlateManager.cpp | 57 +- .../pfMetalPipeline/plMetalPlateManager.h | 38 +- .../pfMetalPipeline/plMetalShader.cpp | 16 +- .../pfMetalPipeline/plMetalShader.h | 27 +- .../pfMetalPipeline/plMetalTextFont.cpp | 135 +- .../pfMetalPipeline/plMetalTextFont.h | 61 +- .../pfMetalPipeline/plMetalVertexShader.cpp | 22 +- .../pfMetalPipeline/plMetalVertexShader.h | 10 +- 24 files changed, 2654 insertions(+), 2845 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format b/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format index 57db9a19a7..dcb3b2f163 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format @@ -4,10 +4,12 @@ BasedOnStyle: Google Language: Cpp # Modifications to the style for Plasma go here IndentWidth: 4 +AccessModifierOffset: -4 ColumnLimit: 0 +BreakBeforeBraces: Custom BraceWrapping: AfterClass: true - AfterControlStatement: MultiLine + AfterControlStatement: Never AfterEnum: true AfterFunction: true AfterNamespace: true @@ -17,17 +19,21 @@ BraceWrapping: BeforeCatch: false BeforeElse: false IndentBraces: false -BreakBeforeBraces: Custom +IndentAccessModifiers: false AlignConsecutiveDeclarations: AcrossComments AlignConsecutiveAssignments: Enabled: False -ReflowComments: false +ReflowComments: true LambdaBodyIndentation: OuterScope AllowShortBlocksOnASingleLine: true AllowShortFunctionsOnASingleLine: true AllowShortIfStatementsOnASingleLine: true PackConstructorInitializers: CurrentLine ObjCBlockIndentWidth: 4 +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesBeforeTrailingComments: 1 --- Language: ObjC # Obj-C specific settings go here diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 0526b3a3ef..0e99c550f0 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -43,83 +43,77 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef plMetalDevice_hpp #define plMetalDevice_hpp -//We need to define these once for Metal somewhere in a cpp file +// We need to define these once for Metal somewhere in a cpp file #define NS_PRIVATE_IMPLEMENTATION #define CA_PRIVATE_IMPLEMENTATION #define MTL_PRIVATE_IMPLEMENTATION -#include #include "plMetalDevice.h" -#include "plMetalPipeline.h" -#include "ShaderTypes.h" +#include +#include "ShaderTypes.h" #include "hsThread.h" #include "plDrawable/plGBufferGroup.h" -#include "plGImage/plMipmap.h" #include "plGImage/plCubicEnvironmap.h" -#include "plPipeline/plRenderTarget.h" - +#include "plGImage/plMipmap.h" +#include "plMetalPipeline.h" #include "plMetalPipelineState.h" +#include "plPipeline/plRenderTarget.h" matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst) { - if (src.fFlags & hsMatrix44::kIsIdent) - { + if (src.fFlags & hsMatrix44::kIsIdent) { memcpy(dst, &matrix_identity_float4x4, sizeof(float) * 16); - } - else - { + } else { memcpy(dst, &src.fMap, sizeof(matrix_float4x4)); } return dst; } - bool plMetalDevice::InitDevice() { - //FIXME: Should Metal adopt InitDevice like OGL? + // FIXME: Should Metal adopt InitDevice like OGL? hsAssert(0, "InitDevice not implemented for Metal rendering"); } void plMetalDevice::Shutdown() { - //FIXME: Should Metal adopt Shutdown like OGL? + // FIXME: Should Metal adopt Shutdown like OGL? hsAssert(0, "Shutdown not implemented for Metal rendering"); } - void plMetalDevice::SetMaxAnsiotropy(uint8_t maxAnsiotropy) { - //setup the material pass samplers - //load them all at once and then let the shader pick - + // setup the material pass samplers + // load them all at once and then let the shader pick + if (maxAnsiotropy == 0) maxAnsiotropy = 1; - - if(fSamplerStates[0] != nullptr) { + + if (fSamplerStates[0] != nullptr) { ReleaseSamplerStates(); } - - MTL::SamplerDescriptor *samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); + + MTL::SamplerDescriptor* samplerDescriptor = MTL::SamplerDescriptor::alloc()->init(); samplerDescriptor->setMaxAnisotropy(maxAnsiotropy); samplerDescriptor->setMinFilter(MTL::SamplerMinMagFilterLinear); samplerDescriptor->setMagFilter(MTL::SamplerMinMagFilterLinear); samplerDescriptor->setMipFilter(MTL::SamplerMipFilterLinear); - + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeRepeat); samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeRepeat); fSamplerStates[0] = fMetalDevice->newSamplerState(samplerDescriptor); - + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeClampToEdge); samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeRepeat); fSamplerStates[1] = fMetalDevice->newSamplerState(samplerDescriptor); - + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeRepeat); samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeClampToEdge); fSamplerStates[2] = fMetalDevice->newSamplerState(samplerDescriptor); - + samplerDescriptor->setSAddressMode(MTL::SamplerAddressModeClampToEdge); samplerDescriptor->setTAddressMode(MTL::SamplerAddressModeClampToEdge); fSamplerStates[3] = fMetalDevice->newSamplerState(samplerDescriptor); @@ -128,10 +122,10 @@ void plMetalDevice::SetMaxAnsiotropy(uint8_t maxAnsiotropy) void plMetalDevice::SetMSAASampleCount(uint8_t sampleCount) { - //Plasma has some MSAA levels that don't completely correspond to what Metal can do - //Best fit them to levels Metal can do. Once they are best fit see if the hardware - //is capable. - + // Plasma has some MSAA levels that don't completely correspond to what Metal can do + // Best fit them to levels Metal can do. Once they are best fit see if the hardware + // is capable. + uint8_t actualSampleCount = 1; if (sampleCount == 6) { actualSampleCount = 8; @@ -140,14 +134,14 @@ void plMetalDevice::SetMSAASampleCount(uint8_t sampleCount) } else if (sampleCount == 2) { actualSampleCount = 2; } - + while (actualSampleCount != 1) { if (fMetalDevice->supportsTextureSampleCount(actualSampleCount)) { break; } actualSampleCount /= 2; } - + fSampleCount = actualSampleCount; } @@ -155,47 +149,46 @@ void plMetalDevice::ReleaseSamplerStates() { fSamplerStates[0]->release(); fSamplerStates[0] = nullptr; - + fSamplerStates[1]->release(); fSamplerStates[1] = nullptr; - + fSamplerStates[2]->release(); fSamplerStates[2] = nullptr; - + fSamplerStates[3]->release(); fSamplerStates[3] = nullptr; } -void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth) { - - //Plasma may clear a target and draw at different times. - //This is specifically trouble with the drawable clear - //Plasma might clear the drawable, and then go off and do - //off screen stuff. Metal doesn't work that way, we need to - //draw and clear at the same time. So if it's a clear for the - //current drawable, remember that and perform the clear when - //we're actually drawing to screen. - +void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth) +{ + // Plasma may clear a target and draw at different times. + // This is specifically trouble with the drawable clear + // Plasma might clear the drawable, and then go off and do + // off screen stuff. Metal doesn't work that way, we need to + // draw and clear at the same time. So if it's a clear for the + // current drawable, remember that and perform the clear when + // we're actually drawing to screen. + if (fCurrentRenderTargetCommandEncoder) { half4 halfClearColor; halfClearColor[0] = clearColor.r; halfClearColor[1] = clearColor.g; halfClearColor[2] = clearColor.b; halfClearColor[3] = clearColor.a; - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalClearPipelineState(this, shouldClearColor, shouldClearDepth).GetRenderPipelineState(); - - const MTL::RenderPipelineState *pipelineState = linkedPipeline->pipelineState; + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalClearPipelineState(this, shouldClearColor, shouldClearDepth).GetRenderPipelineState(); + + const MTL::RenderPipelineState* pipelineState = linkedPipeline->pipelineState; CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); - + float clearCoords[8] = { -1, -1, 1, -1, -1, 1, - 1, 1 - }; + 1, 1}; float clearDepth = 1.0f; CurrentRenderCommandEncoder()->setDepthStencilState(fNoZReadStencilState); - + CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); CurrentRenderCommandEncoder()->setVertexBytes(&clearCoords, sizeof(clearCoords), 0); CurrentRenderCommandEncoder()->setFragmentBytes(&halfClearColor, sizeof(halfClearColor), 0); @@ -218,30 +211,29 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh } } } - } -void plMetalDevice::BeginNewRenderPass() { - - //printf("Beginning new render pass\n"); - - //lazilly create the screen render encoder if it does not yet exist +void plMetalDevice::BeginNewRenderPass() +{ + // printf("Beginning new render pass\n"); + + // lazilly create the screen render encoder if it does not yet exist if (!fCurrentOffscreenCommandBuffer && !fCurrentRenderTargetCommandEncoder) { SetRenderTarget(NULL); } - + if (fCurrentRenderTargetCommandEncoder) { - //if we have an existing render target, submit it's commands and release it - //if we need to come back to this render target, we can always create a new render - //pass descriptor and submit more commands + // if we have an existing render target, submit it's commands and release it + // if we need to come back to this render target, we can always create a new render + // pass descriptor and submit more commands fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; } - - MTL::RenderPassDescriptor *renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); + + MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); - + if (fCurrentRenderTarget) { renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearRenderTargetColor.x, fClearRenderTargetColor.y, fClearRenderTargetColor.z, fClearRenderTargetColor.w)); if (fShouldClearRenderTarget) { @@ -249,17 +241,17 @@ void plMetalDevice::BeginNewRenderPass() { } else { renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); } - - if ( fCurrentRenderTarget->GetZDepth() ) { - plMetalRenderTargetRef* deviceTarget= (plMetalRenderTargetRef *)fCurrentRenderTarget->GetDeviceRef(); + + if (fCurrentRenderTarget->GetZDepth()) { + plMetalRenderTargetRef* deviceTarget = (plMetalRenderTargetRef*)fCurrentRenderTarget->GetDeviceRef(); renderPassDescriptor->depthAttachment()->setTexture(deviceTarget->fDepthBuffer); renderPassDescriptor->depthAttachment()->setClearDepth(fClearRenderTargetDepth); renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); } - + renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentOutputTexture); - + fCurrentRenderTargetCommandEncoder = fCurrentOffscreenCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); } else { renderPassDescriptor->colorAttachments()->object(0)->setClearColor(MTL::ClearColor(fClearDrawableColor.x, fClearDrawableColor.y, fClearDrawableColor.z, fClearDrawableColor.w)); @@ -268,13 +260,12 @@ void plMetalDevice::BeginNewRenderPass() { } else { renderPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionLoad); } - + renderPassDescriptor->depthAttachment()->setClearDepth(fClearDrawableDepth); renderPassDescriptor->depthAttachment()->setLoadAction(MTL::LoadActionClear); renderPassDescriptor->depthAttachment()->setTexture(fCurrentDrawableDepthTexture); renderPassDescriptor->depthAttachment()->setStoreAction(MTL::StoreActionDontCare); - - + if (fSampleCount == 1) { if (NeedsPostprocessing()) { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentUnprocessedOutputTexture); @@ -283,21 +274,21 @@ void plMetalDevice::BeginNewRenderPass() { } } else { renderPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentFragmentMSAAOutputTexture); - - //if we need postprocessing, output to the main pass texture - //otherwise we can go straight to the drawable + + // if we need postprocessing, output to the main pass texture + // otherwise we can go straight to the drawable if (NeedsPostprocessing()) { renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentUnprocessedOutputTexture); } else { renderPassDescriptor->colorAttachments()->object(0)->setResolveTexture(fCurrentFragmentOutputTexture); } - + renderPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionMultisampleResolve); } - + fCurrentRenderTargetCommandEncoder = fCurrentCommandBuffer->renderCommandEncoder(renderPassDescriptor)->retain(); } - + fCurrentRenderTargetCommandEncoder->setFragmentSamplerStates(fSamplerStates, NS::Range::Make(0, 4)); } @@ -309,54 +300,54 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) We used to allow starting new passes on the same drawable but that would break memoryless buffers on Apple Silicon that don't survive between passes. */ - if((!fCurrentRenderTarget && !target) && fCurrentRenderTargetCommandEncoder) { + if ((!fCurrentRenderTarget && !target) && fCurrentRenderTargetCommandEncoder) { return; } - if( fCurrentRenderTargetCommandEncoder ) { - //if we have an existing render target, submit it's commands and release it - //if we need to come back to this render target, we can always create a new render - //pass descriptor and submit more commands + if (fCurrentRenderTargetCommandEncoder) { + // if we have an existing render target, submit it's commands and release it + // if we need to come back to this render target, we can always create a new render + // pass descriptor and submit more commands fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; } - - if( fCurrentOffscreenCommandBuffer ) { + + if (fCurrentOffscreenCommandBuffer) { if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { - //if our target was offscreen, go ahead and blit back. Something will want this data. + // if our target was offscreen, go ahead and blit back. Something will want this data. MTL::BlitCommandEncoder* blitEncoder = fCurrentOffscreenCommandBuffer->blitCommandEncoder(); blitEncoder->synchronizeResource(fCurrentFragmentOutputTexture); blitEncoder->endEncoding(); } fCurrentOffscreenCommandBuffer->commit(); if (fCurrentRenderTarget && fCurrentRenderTarget->GetFlags() & plRenderTarget::kIsOffscreen) { - //if it's an offscreen buffer, wait for completion - //something is probably going to want to syncronously grab data + // if it's an offscreen buffer, wait for completion + // something is probably going to want to syncronously grab data fCurrentOffscreenCommandBuffer->waitUntilCompleted(); } fCurrentOffscreenCommandBuffer->release(); fCurrentOffscreenCommandBuffer = nil; } - + fCurrentRenderTarget = target; - - if ( fCurrentRenderTarget && fShouldClearRenderTarget == false ) { + + if (fCurrentRenderTarget && fShouldClearRenderTarget == false) { // clear if a clear color wasn't already set fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); fShouldClearRenderTarget = true; fClearRenderTargetDepth = 1.0; } - - if(fCurrentRenderTarget) { - if(!target->GetDeviceRef()) { + + if (fCurrentRenderTarget) { + if (!target->GetDeviceRef()) { fPipeline->MakeRenderTargetRef(target); } - plMetalRenderTargetRef *deviceTarget= (plMetalRenderTargetRef *)target->GetDeviceRef(); + plMetalRenderTargetRef* deviceTarget = (plMetalRenderTargetRef*)target->GetDeviceRef(); fCurrentOffscreenCommandBuffer = fCommandQueue->commandBuffer(); fCurrentOffscreenCommandBuffer->retain(); fCurrentFragmentOutputTexture = deviceTarget->fTexture; - - if(deviceTarget->fDepthBuffer) { + + if (deviceTarget->fDepthBuffer) { fCurrentDepthFormat = MTL::PixelFormatDepth32Float_Stencil8; } else { fCurrentDepthFormat = MTL::PixelFormatInvalid; @@ -368,111 +359,112 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) } plMetalDevice::plMetalDevice() -: fErrorMsg(nullptr), - fActiveThread(hsThread::ThisThreadHash()), - fCurrentDrawable(nullptr), - fCommandQueue(nullptr), - fCurrentRenderTargetCommandEncoder(nullptr), - fCurrentDrawableDepthTexture(nullptr), - fCurrentFragmentOutputTexture(nullptr), - fCurrentCommandBuffer(nullptr), - fCurrentOffscreenCommandBuffer(nullptr), - fCurrentRenderTarget(nullptr), - fNewPipelineStateMap(), - fCurrentFragmentMSAAOutputTexture(nullptr), - fCurrentUnprocessedOutputTexture(nullptr), - fGammaLUTTexture(nullptr), - fGammaAdjustState(nullptr), - fBlitCommandBuffer(nullptr), - fBlitCommandEncoder(nullptr) - { + : fErrorMsg(nullptr), + fActiveThread(hsThread::ThisThreadHash()), + fCurrentDrawable(nullptr), + fCommandQueue(nullptr), + fCurrentRenderTargetCommandEncoder(nullptr), + fCurrentDrawableDepthTexture(nullptr), + fCurrentFragmentOutputTexture(nullptr), + fCurrentCommandBuffer(nullptr), + fCurrentOffscreenCommandBuffer(nullptr), + fCurrentRenderTarget(nullptr), + fNewPipelineStateMap(), + fCurrentFragmentMSAAOutputTexture(nullptr), + fCurrentUnprocessedOutputTexture(nullptr), + fGammaLUTTexture(nullptr), + fGammaAdjustState(nullptr), + fBlitCommandBuffer(nullptr), + fBlitCommandEncoder(nullptr) +{ fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; fSamplerStates[0] = nullptr; - + fMetalDevice = MTL::CreateSystemDefaultDevice(); fCommandQueue = fMetalDevice->newCommandQueue(); - - //set up all the depth stencil states - MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); - + + // set up all the depth stencil states + MTL::DepthStencilDescriptor* depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); depthDescriptor->setDepthWriteEnabled(true); depthDescriptor->setLabel(NS::String::string("No Z Read", NS::UTF8StringEncoding)); fNoZReadStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); - + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionLessEqual); depthDescriptor->setDepthWriteEnabled(false); depthDescriptor->setLabel(NS::String::string("No Z Write", NS::UTF8StringEncoding)); fNoZWriteStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); - + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); depthDescriptor->setDepthWriteEnabled(false); depthDescriptor->setLabel(NS::String::string("No Z Read or Write", NS::UTF8StringEncoding)); fNoZReadOrWriteStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); - + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionLessEqual); depthDescriptor->setLabel(NS::String::string("Z Read and Write", NS::UTF8StringEncoding)); depthDescriptor->setDepthWriteEnabled(true); fDefaultStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); - + depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionGreaterEqual); depthDescriptor->setLabel(NS::String::string("Reverse Z", NS::UTF8StringEncoding)); depthDescriptor->setDepthWriteEnabled(true); fReverseZStencilState = fMetalDevice->newDepthStencilState(depthDescriptor); - + depthDescriptor->release(); } -void plMetalDevice::SetViewport() { - CurrentRenderCommandEncoder()->setViewport({ (double)fPipeline->GetViewTransform().GetViewPortLeft(), - (double)fPipeline->GetViewTransform().GetViewPortTop(), - (double)fPipeline->GetViewTransform().GetViewPortWidth(), - (double)fPipeline->GetViewTransform().GetViewPortHeight(), - 0.f, 1.f }); +void plMetalDevice::SetViewport() +{ + CurrentRenderCommandEncoder()->setViewport({(double)fPipeline->GetViewTransform().GetViewPortLeft(), + (double)fPipeline->GetViewTransform().GetViewPortTop(), + (double)fPipeline->GetViewTransform().GetViewPortWidth(), + (double)fPipeline->GetViewTransform().GetViewPortHeight(), + 0.f, 1.f}); } -bool plMetalDevice::BeginRender() { +bool plMetalDevice::BeginRender() +{ if (fActiveThread == hsThread::ThisThreadHash()) { return true; } fActiveThread = hsThread::ThisThreadHash(); - + return true; } -static uint32_t IGetBufferFormatSize(uint8_t format) +static uint32_t IGetBufferFormatSize(uint8_t format) { - uint32_t size = sizeof( float ) * 6 + sizeof( uint32_t ) * 2; // Position and normal, and two packed colors + uint32_t size = sizeof(float) * 6 + sizeof(uint32_t) * 2; // Position and normal, and two packed colors - switch (format & plGBufferGroup::kSkinWeightMask) - { + switch (format & plGBufferGroup::kSkinWeightMask) { case plGBufferGroup::kSkinNoWeights: break; case plGBufferGroup::kSkin1Weight: size += sizeof(float); break; default: - hsAssert( false, "Invalid skin weight value in IGetBufferFormatSize()" ); + hsAssert(false, "Invalid skin weight value in IGetBufferFormatSize()"); } - size += sizeof( float ) * 3 * plGBufferGroup::CalcNumUVs(format); + size += sizeof(float) * 3 * plGBufferGroup::CalcNumUVs(format); return size; } -void plMetalDevice::SetupVertexBufferRef(plGBufferGroup *owner, uint32_t idx, plMetalDevice::VertexBufferRef *vRef) +void plMetalDevice::SetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalDevice::VertexBufferRef* vRef) { uint8_t format = owner->GetVertexFormat(); - + if (format & plGBufferGroup::kSkinIndices) { format &= ~(plGBufferGroup::kSkinWeightMask | plGBufferGroup::kSkinIndices); - format |= plGBufferGroup::kSkinNoWeights; // Should do nothing, but just in case... + format |= plGBufferGroup::kSkinNoWeights; // Should do nothing, but just in case... vRef->SetSkinned(true); vRef->SetVolatile(true); } - + uint32_t vertSize = vertSize = IGetBufferFormatSize(format); // vertex stride uint32_t numVerts = owner->GetVertBufferCount(idx); @@ -489,23 +481,22 @@ void plMetalDevice::SetupVertexBufferRef(plGBufferGroup *owner, uint32_t idx, pl vRef->SetVolatile(vRef->Volatile() || owner->AreVertsVolatile()); vRef->fIndex = idx; - + const uint32_t vertStart = owner->GetVertBufferStart(idx) * vertSize; const uint32_t size = owner->GetVertBufferEnd(idx) * vertSize - vertStart; - + owner->SetVertexBufferRef(idx, vRef); hsRefCnt_SafeUnRef(vRef); } -void plMetalDevice::CheckStaticVertexBuffer(plMetalDevice::VertexBufferRef *vRef, plGBufferGroup *owner, uint32_t idx) +void plMetalDevice::CheckStaticVertexBuffer(plMetalDevice::VertexBufferRef* vRef, plGBufferGroup* owner, uint32_t idx) { hsAssert(!vRef->Volatile(), "Creating a managed vertex buffer for a volatile buffer ref"); - - if (!vRef->GetBuffer()) - { + + if (!vRef->GetBuffer()) { FillVertexBufferRef(vRef, owner, idx); - + // This is currently a no op, but this would let the buffer know it can // unload the system memory copy, since we have a managed version now. owner->PurgeVertBuffer(idx); @@ -517,67 +508,58 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr const uint32_t vertSize = ref->fVertexSize; const uint32_t vertStart = group->GetVertBufferStart(idx) * vertSize; const uint32_t size = group->GetVertBufferEnd(idx) * vertSize - vertStart; - - if(ref->GetBuffer()) { + + if (ref->GetBuffer()) { assert(size <= ref->GetBuffer()->length()); } - - if (!size) - { + + if (!size) { return; } - + MTL::Buffer* metalBuffer = fMetalDevice->newBuffer(size, MTL::StorageModeManaged); ref->SetBuffer(metalBuffer); - uint8_t* buffer = (uint8_t*) ref->GetBuffer()->contents(); + uint8_t* buffer = (uint8_t*)ref->GetBuffer()->contents(); - if (ref->fData) - { + if (ref->fData) { memcpy(buffer, ref->fData + vertStart, size); - } - else - { + } else { hsAssert(0 == vertStart, "Offsets on non-interleaved data not supported"); hsAssert(group->GetVertBufferCount(idx) * vertSize == size, "Trailing dead space on non-interleaved data not supported"); - + uint8_t* ptr = buffer; - const uint32_t vertSmallSize = group->GetVertexLiteStride() - sizeof(hsPoint3) * 2; - uint8_t* srcVPtr = group->GetVertBufferData(idx); + const uint32_t vertSmallSize = group->GetVertexLiteStride() - sizeof(hsPoint3) * 2; + uint8_t* srcVPtr = group->GetVertBufferData(idx); plGBufferColor* const srcCPtr = group->GetColorBufferData(idx); const size_t numCells = group->GetNumCells(idx); - for (size_t i = 0; i < numCells; i++) - { + for (size_t i = 0; i < numCells; i++) { plGBufferCell* cell = group->GetCell(idx, i); - if (cell->fColorStart == uint32_t(-1)) - { + if (cell->fColorStart == uint32_t(-1)) { /// Interleaved, do straight copy memcpy(ptr, srcVPtr + cell->fVtxStart, cell->fLength * vertSize); ptr += cell->fLength * vertSize; assert(size <= cell->fLength * vertSize); - } - else - { + } else { hsStatusMessage("Non interleaved data"); /// Separated, gotta interleave - uint8_t* tempVPtr = srcVPtr + cell->fVtxStart; + uint8_t* tempVPtr = srcVPtr + cell->fVtxStart; plGBufferColor* tempCPtr = srcCPtr + cell->fColorStart; - int j; - for( j = 0; j < cell->fLength; j++ ) - { - memcpy( ptr, tempVPtr, sizeof( hsPoint3 ) * 2 ); - ptr += sizeof( hsPoint3 ) * 2; - tempVPtr += sizeof( hsPoint3 ) * 2; - - memcpy( ptr, &tempCPtr->fDiffuse, sizeof( uint32_t ) ); - ptr += sizeof( uint32_t ); - memcpy( ptr, &tempCPtr->fSpecular, sizeof( uint32_t ) ); - ptr += sizeof( uint32_t ); - - memcpy( ptr, tempVPtr, vertSmallSize ); + int j; + for (j = 0; j < cell->fLength; j++) { + memcpy(ptr, tempVPtr, sizeof(hsPoint3) * 2); + ptr += sizeof(hsPoint3) * 2; + tempVPtr += sizeof(hsPoint3) * 2; + + memcpy(ptr, &tempCPtr->fDiffuse, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + memcpy(ptr, &tempCPtr->fSpecular, sizeof(uint32_t)); + ptr += sizeof(uint32_t); + + memcpy(ptr, tempVPtr, vertSmallSize); ptr += vertSmallSize; tempVPtr += vertSmallSize; tempCPtr++; @@ -587,7 +569,7 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr hsAssert((ptr - buffer) == size, "Didn't fill the buffer?"); } - + metalBuffer->release(); /// Unlock and clean up @@ -595,12 +577,12 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr ref->SetDirty(false); } -void plMetalDevice::FillVolatileVertexBufferRef(plMetalDevice::VertexBufferRef *ref, plGBufferGroup *group, uint32_t idx) +void plMetalDevice::FillVolatileVertexBufferRef(plMetalDevice::VertexBufferRef* ref, plGBufferGroup* group, uint32_t idx) { uint8_t* dst = ref->fData; uint8_t* src = group->GetVertBufferData(idx); - size_t uvChanSize = plGBufferGroup::CalcNumUVs(group->GetVertexFormat()) * sizeof(float) * 3; + size_t uvChanSize = plGBufferGroup::CalcNumUVs(group->GetVertexFormat()) * sizeof(float) * 3; uint8_t numWeights = (group->GetVertexFormat() & plGBufferGroup::kSkinWeightMask) >> 4; for (uint32_t i = 0; i < ref->fCount; ++i) { @@ -628,7 +610,7 @@ void plMetalDevice::FillVolatileVertexBufferRef(plMetalDevice::VertexBufferRef * } } -void plMetalDevice::SetupIndexBufferRef(plGBufferGroup *owner, uint32_t idx, plMetalDevice::IndexBufferRef *iRef) +void plMetalDevice::SetupIndexBufferRef(plGBufferGroup* owner, uint32_t idx, plMetalDevice::IndexBufferRef* iRef) { uint32_t numIndices = owner->GetIndexBufferCount(idx); iRef->fCount = numIndices; @@ -645,81 +627,80 @@ void plMetalDevice::SetupIndexBufferRef(plGBufferGroup *owner, uint32_t idx, plM iRef->SetVolatile(owner->AreIdxVolatile()); } -void plMetalDevice::CheckIndexBuffer(plMetalDevice::IndexBufferRef *iRef) +void plMetalDevice::CheckIndexBuffer(plMetalDevice::IndexBufferRef* iRef) { - if(!iRef->GetBuffer() && iRef->fCount) { + if (!iRef->GetBuffer() && iRef->fCount) { iRef->SetVolatile(false); - + iRef->SetDirty(true); iRef->SetRebuiltSinceUsed(true); } } -void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef *iRef, plGBufferGroup *owner, uint32_t idx) +void plMetalDevice::FillIndexBufferRef(plMetalDevice::IndexBufferRef* iRef, plGBufferGroup* owner, uint32_t idx) { uint32_t startIdx = owner->GetIndexBufferStart(idx); uint32_t fullSize = owner->GetIndexBufferCount(idx) * sizeof(uint16_t); uint32_t size = (owner->GetIndexBufferEnd(idx) - startIdx) * sizeof(uint16_t); - if (!size) - { + if (!size) { return; } - + iRef->PrepareForWrite(); MTL::Buffer* indexBuffer = iRef->GetBuffer(); - if(!indexBuffer || indexBuffer->length() < fullSize) { + if (!indexBuffer || indexBuffer->length() < fullSize) { indexBuffer = fMetalDevice->newBuffer(fullSize, MTL::ResourceStorageModeManaged); iRef->SetBuffer(indexBuffer); indexBuffer->release(); } - + memcpy(((uint16_t*)indexBuffer->contents()) + startIdx, owner->GetIndexBufferData(idx) + startIdx, size); indexBuffer->didModifyRange(NS::Range(startIdx, size)); iRef->SetDirty(false); } -void plMetalDevice::SetupTextureRef(plBitmap *img, plMetalDevice::TextureRef *tRef) +void plMetalDevice::SetupTextureRef(plBitmap* img, plMetalDevice::TextureRef* tRef) { tRef->fOwner = img; - + plBitmap* imageToCheck = img; - - //if it's a cubic texture, check the first face. The root img will give a false format that will cause us to decode wrong. + + // if it's a cubic texture, check the first face. The root img will give a false format that will cause us to decode wrong. plCubicEnvironmap* cubicImg = dynamic_cast(img); - if(cubicImg) { + if (cubicImg) { imageToCheck = cubicImg->GetFace(0); } if (imageToCheck->IsCompressed()) { switch (imageToCheck->fDirectXInfo.fCompressionType) { - case plBitmap::DirectXInfo::kDXT1: + case plBitmap::DirectXInfo::kDXT1: tRef->fFormat = MTL::PixelFormatBC1_RGBA; - break; - case plBitmap::DirectXInfo::kDXT5: + break; + case plBitmap::DirectXInfo::kDXT5: tRef->fFormat = MTL::PixelFormatBC3_RGBA; - break; + break; } } else { switch (imageToCheck->fUncompressedInfo.fType) { - case plBitmap::UncompressedInfo::kRGB8888: - tRef->fFormat = MTL::PixelFormatBGRA8Unorm; - break; - case plBitmap::UncompressedInfo::kRGB4444: - //we'll convert this on load to 8 bits per channel - //Metal doesn't support 4 bits per channel on all hardware - tRef->fFormat = MTL::PixelFormatBGRA8Unorm; - break; - case plBitmap::UncompressedInfo::kRGB1555: - tRef->fFormat = MTL::PixelFormatBGR5A1Unorm; - break; - case plBitmap::UncompressedInfo::kInten8: - tRef->fFormat = MTL::PixelFormatR8Uint; - break; - case plBitmap::UncompressedInfo::kAInten88: - tRef->fFormat = MTL::PixelFormatRG8Uint; - break; + case plBitmap::UncompressedInfo::kRGB8888: + tRef->fFormat = MTL::PixelFormatBGRA8Unorm; + break; + case plBitmap::UncompressedInfo::kRGB4444: + // we'll convert this on load to 8 bits per channel + // Metal doesn't support 4 bits per channel on all hardware + tRef->fFormat = MTL::PixelFormatBGRA8Unorm; + break; + case plBitmap::UncompressedInfo::kRGB1555: + tRef->fFormat = MTL::PixelFormatBGR5A1Unorm; + break; + case plBitmap::UncompressedInfo::kInten8: + tRef->fFormat = MTL::PixelFormatR8Uint; + break; + case plBitmap::UncompressedInfo::kAInten88: + tRef->fFormat = MTL::PixelFormatRG8Uint; + break; } } @@ -734,7 +715,7 @@ void plMetalDevice::ReleaseFramebufferObjects() if (fCurrentUnprocessedOutputTexture) fCurrentUnprocessedOutputTexture->release(); fCurrentFragmentOutputTexture = nil; - + if (fGammaAdjustState) fGammaAdjustState->release(); fGammaAdjustState = nil; @@ -748,22 +729,21 @@ void plMetalDevice::SetFramebufferFormat(MTL::PixelFormat format) } } -void plMetalDevice::CheckTexture(plMetalDevice::TextureRef *tRef) +void plMetalDevice::CheckTexture(plMetalDevice::TextureRef* tRef) { - if (!tRef->fTexture) - { + if (!tRef->fTexture) { tRef->SetDirty(true); } } -uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap) +uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef* tRef, plMipmap* mipmap) { if (mipmap->IsCompressed()) { mipmap->SetCurrLevel(tRef->fLevels); while ((mipmap->GetCurrWidth() | mipmap->GetCurrHeight()) & 0x03) { tRef->fLevels--; - hsAssert(tRef->fLevels >= 0, "How was this ever compressed?" ); - if(tRef->fLevels < 0) { + hsAssert(tRef->fLevels >= 0, "How was this ever compressed?"); + if (tRef->fLevels < 0) { tRef->fLevels = -1; break; } @@ -772,7 +752,7 @@ uint plMetalDevice::ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMi } } -void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice) +void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef* tRef, plMipmap* img, uint slice) { if (img->IsCompressed()) { /* @@ -786,17 +766,17 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *i fixed to be consistant. */ #define HACK_LEVEL_SIZE 1 - + #if HACK_LEVEL_SIZE NS::UInteger width = tRef->fTexture->width(); NS::UInteger height = tRef->fTexture->height(); #endif - + if (tRef->fLevels == -1) { hsAssert(1, "Bad texture found"); return; } - + for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { img->SetCurrLevel(lvl); #if HACK_LEVEL_SIZE @@ -806,42 +786,42 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *i NS::UInteger levelWidth = img->GetCurrWidth(); NS::UInteger levelHeight = img->GetCurrHeight(); #endif - + switch (img->fDirectXInfo.fCompressionType) { case plBitmap::DirectXInfo::kDXT1: - tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, levelWidth, levelHeight), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), levelWidth * 2, 0); + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, levelWidth, levelHeight), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), levelWidth * 2, 0); break; case plBitmap::DirectXInfo::kDXT5: - tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); break; - } + } } } else { for (int lvl = 0; lvl <= tRef->fLevels; lvl++) { img->SetCurrLevel(lvl); - - if(img->GetCurrLevelPtr()) { - if(img->fUncompressedInfo.fType == plBitmap::UncompressedInfo::kRGB4444) { - - struct RGBA4444Component { - unsigned r:4; - unsigned g:4; - unsigned b:4; - unsigned a:4; + + if (img->GetCurrLevelPtr()) { + if (img->fUncompressedInfo.fType == plBitmap::UncompressedInfo::kRGB4444) { + struct RGBA4444Component + { + unsigned r : 4; + unsigned g : 4; + unsigned b : 4; + unsigned a : 4; }; - - RGBA4444Component *in = (RGBA4444Component *)img->GetCurrLevelPtr(); - simd_uint4 *out = (simd_uint4 *) malloc(img->GetCurrHeight() * img->GetCurrWidth() * 4); - - for(int i=0; i<(img->GetCurrWidth() * img->GetCurrHeight()); i++) { + + RGBA4444Component* in = (RGBA4444Component*)img->GetCurrLevelPtr(); + simd_uint4* out = (simd_uint4*)malloc(img->GetCurrHeight() * img->GetCurrWidth() * 4); + + for (int i = 0; i < (img->GetCurrWidth() * img->GetCurrHeight()); i++) { out[i].r = in[i].r; out[i].g = in[i].g; out[i].b = in[i].b; out[i].a = in[i].a; } - + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, out, img->GetCurrWidth() * 4, 0); - + free(out); } else { tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); @@ -860,50 +840,48 @@ void plMetalDevice::MakeTextureRef(plMetalDevice::TextureRef* tRef, plMipmap* im if (!img->GetImage()) { return; } - - if(tRef->fTexture) { + + if (tRef->fTexture) { tRef->fTexture->release(); } - + tRef->fLevels = img->GetNumLevels() - 1; - //FIXME: Is this texture check actually needed - //if(!tRef->fTexture) { - ConfigureAllowedLevels(tRef, img); - - bool textureIsValid = tRef->fLevels > 0; - - //texture doesn't exist yet, create it - bool supportsMipMap = tRef->fLevels && textureIsValid; - MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(tRef->fFormat, img->GetWidth(), img->GetHeight(), supportsMipMap); - descriptor->setUsage(MTL::TextureUsageShaderRead); - - //Metal gets mad if we set this with 0, only set it if we know there are mipmaps - if(supportsMipMap) { + // FIXME: Is this texture check actually needed + // if(!tRef->fTexture) { + ConfigureAllowedLevels(tRef, img); + + bool textureIsValid = tRef->fLevels > 0; + + // texture doesn't exist yet, create it + bool supportsMipMap = tRef->fLevels && textureIsValid; + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor(tRef->fFormat, img->GetWidth(), img->GetHeight(), supportsMipMap); + descriptor->setUsage(MTL::TextureUsageShaderRead); + + // Metal gets mad if we set this with 0, only set it if we know there are mipmaps + if (supportsMipMap) { descriptor->setMipmapLevelCount(tRef->fLevels + 1); } descriptor->setStorageMode(MTL::StorageModeManaged); - - + tRef->fTexture = fMetalDevice->newTexture(descriptor); - PopulateTexture( tRef, img, 0); + PopulateTexture(tRef, img, 0); //} - - + tRef->SetDirty(false); } -void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plCubicEnvironmap *img) +void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef* tRef, plCubicEnvironmap* img) { - MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::textureCubeDescriptor(tRef->fFormat, img->GetFace(0)->GetWidth(), tRef->fLevels != 0); - + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::textureCubeDescriptor(tRef->fFormat, img->GetFace(0)->GetWidth(), tRef->fLevels != 0); + if (tRef->fLevels != 0) { descriptor->setMipmapLevelCount(tRef->fLevels + 1); } descriptor->setUsage(MTL::TextureUsageShaderRead); - + tRef->fTexture = fMetalDevice->newTexture(descriptor); - + static const uint kFaceMapping[] = { 1, // kLeftFace 0, // kRightFace @@ -913,9 +891,9 @@ void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef *tRef, plCubic 3 // kBottomFace }; for (size_t i = 0; i < 6; i++) { - PopulateTexture( tRef, img->GetFace(i), kFaceMapping[i]); + PopulateTexture(tRef, img->GetFace(i), kFaceMapping[i]); } - + tRef->SetDirty(false); } @@ -928,7 +906,7 @@ void plMetalDevice::SetWorldToCameraMatrix(const hsMatrix44& src) { hsMatrix44 inv; src.GetInverse(&inv); - + hsMatrix2SIMD(src, &fMatrixW2C); hsMatrix2SIMD(inv, &fMatrixC2W); } @@ -937,7 +915,7 @@ void plMetalDevice::SetLocalToWorldMatrix(const hsMatrix44& src) { hsMatrix44 inv; src.GetInverse(&inv); - + hsMatrix2SIMD(src, &fMatrixL2W); hsMatrix2SIMD(inv, &fMatrixW2L); } @@ -946,50 +924,50 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) { fCurrentCommandBuffer = fCommandQueue->commandBuffer(); fCurrentCommandBuffer->retain(); - + SetFramebufferFormat(drawable->texture()->pixelFormat()); - + bool depthNeedsRebuild = fCurrentDrawableDepthTexture == nullptr; depthNeedsRebuild |= drawable->texture()->width() != fCurrentDrawableDepthTexture->width() || drawable->texture()->height() != fCurrentDrawableDepthTexture->height(); - - //cache the depth buffer, we'll just clear it every time. - if(depthNeedsRebuild) { - if(fCurrentDrawableDepthTexture) { + + // cache the depth buffer, we'll just clear it every time. + if (depthNeedsRebuild) { + if (fCurrentDrawableDepthTexture) { fCurrentDrawableDepthTexture->release(); fCurrentFragmentMSAAOutputTexture->release(); } - - MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, - drawable->texture()->width(), - drawable->texture()->height(), - false); + + MTL::TextureDescriptor* depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + drawable->texture()->width(), + drawable->texture()->height(), + false); if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); - } else { + } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); } depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); - + if (fSampleCount != 1) { - //MSSA depth and color output + // MSSA depth and color output depthTextureDescriptor->setSampleCount(fSampleCount); depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); depthTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); - } else { + } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); } fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); - - MTL::TextureDescriptor *msaaColorTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), + + MTL::TextureDescriptor* msaaColorTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false); msaaColorTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); if (fMetalDevice->supportsFamily(MTL::GPUFamilyApple1) && fSampleCount == 1) { msaaColorTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); - } else { + } else { msaaColorTextureDescriptor->setStorageMode(MTL::StorageModePrivate); } msaaColorTextureDescriptor->setTextureType(MTL::TextureType2DMultisample); @@ -999,9 +977,9 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) fCurrentDrawableDepthTexture = fMetalDevice->newTexture(depthTextureDescriptor); } } - - //Do we need to create a unprocessed output texture? - //If the depth needs to be rebuilt - we probably need to rebuild this one too + + // Do we need to create a unprocessed output texture? + // If the depth needs to be rebuilt - we probably need to rebuild this one too if ((fCurrentUnprocessedOutputTexture && depthNeedsRebuild) || (fCurrentUnprocessedOutputTexture == nullptr && NeedsPostprocessing())) { MTL::TextureDescriptor* mainPassDescriptor = MTL::TextureDescriptor::texture2DDescriptor(drawable->texture()->pixelFormat(), drawable->texture()->width(), drawable->texture()->height(), false); mainPassDescriptor->setStorageMode(MTL::StorageModePrivate); @@ -1009,111 +987,110 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) fCurrentUnprocessedOutputTexture->release(); fCurrentUnprocessedOutputTexture = fMetalDevice->newTexture(mainPassDescriptor); } - + fCurrentDrawable = drawable->retain(); } -void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable **condOut) { - - __block std::condition_variable *newCondition = new std::condition_variable(); +void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable** condOut) +{ + __block std::condition_variable* newCondition = new std::condition_variable(); fConditionMap[record] = newCondition; - if(condOut) { + if (condOut) { *condOut = newCondition; } - + if (fNewPipelineStateMap[record] != NULL) { return fNewPipelineStateMap[record]; } - - MTL::Library *library = fMetalDevice->newDefaultLibrary(); - + + MTL::Library* library = fMetalDevice->newDefaultLibrary(); + std::shared_ptr pipelineState = record.state; - + MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init(); descriptor->setLabel(pipelineState->GetDescription()); - + const MTL::Function* vertexFunction = pipelineState->GetVertexFunction(library); const MTL::Function* fragmentFunction = pipelineState->GetFragmentFunction(library); descriptor->setVertexFunction(vertexFunction); descriptor->setFragmentFunction(fragmentFunction); - + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); pipelineState->ConfigureBlend(descriptor->colorAttachments()->object(0)); - - MTL::VertexDescriptor *vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); + + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); pipelineState->ConfigureVertexDescriptor(vertexDescriptor); descriptor->setVertexDescriptor(vertexDescriptor); descriptor->setDepthAttachmentPixelFormat(record.depthFormat); descriptor->colorAttachments()->object(0)->setPixelFormat(record.colorFormat); - + descriptor->setSampleCount(record.sampleCount); - + NS::Error* error; - fMetalDevice->newRenderPipelineState(descriptor, ^(MTL::RenderPipelineState *pipelineState, NS::Error *error){ + fMetalDevice->newRenderPipelineState(descriptor, ^(MTL::RenderPipelineState* pipelineState, NS::Error* error) { if (error) { - //leave the condition in place for now, we don't want to - //retry if the shader is defective. the condition will - //prevent retries + // leave the condition in place for now, we don't want to + // retry if the shader is defective. the condition will + // prevent retries hsAssert(0, error->localizedDescription()->cString(NS::UTF8StringEncoding)); } else { - plMetalLinkedPipeline *linkedPipeline = new plMetalLinkedPipeline(); + plMetalLinkedPipeline* linkedPipeline = new plMetalLinkedPipeline(); linkedPipeline->pipelineState = pipelineState->retain(); linkedPipeline->fragFunction = fragmentFunction; linkedPipeline->vertexFunction = vertexFunction; - + fNewPipelineStateMap[record] = linkedPipeline; - //signal that we're done + // signal that we're done newCondition->notify_all(); } }); - + descriptor->release(); library->release(); } -plMetalDevice::plMetalLinkedPipeline* plMetalDevice::PipelineState(plMetalPipelineState* pipelineState) { - +plMetalDevice::plMetalLinkedPipeline* plMetalDevice::PipelineState(plMetalPipelineState* pipelineState) +{ MTL::PixelFormat depthFormat = fCurrentDepthFormat; MTL::PixelFormat colorFormat = fCurrentFragmentOutputTexture->pixelFormat(); - + plMetalPipelineRecord record = { depthFormat, colorFormat, - CurrentTargetSampleCount() - }; - + CurrentTargetSampleCount()}; + record.state = std::shared_ptr(pipelineState->Clone()); - + plMetalLinkedPipeline* renderState = fNewPipelineStateMap[record]; - - //if it exists, return it, we're done - if(renderState) { + + // if it exists, return it, we're done + if (renderState) { return renderState; } - - //check and see if we're already building it. If so, wait. - //Note: even if it already exists, this lock will be kept, and it will - //let us through. This is to prevent race conditions where the render state - //was null, but maybe in the time it took us to get here the state compiled. - std::condition_variable *alreadyBuildingCondition = fConditionMap[record]; - if(alreadyBuildingCondition) { + + // check and see if we're already building it. If so, wait. + // Note: even if it already exists, this lock will be kept, and it will + // let us through. This is to prevent race conditions where the render state + // was null, but maybe in the time it took us to get here the state compiled. + std::condition_variable* alreadyBuildingCondition = fConditionMap[record]; + if (alreadyBuildingCondition) { std::unique_lock lock(fPipelineCreationMtx); alreadyBuildingCondition->wait(lock); - - //should be returning the render state here, if not it failed to build - //we'll allow the null return + + // should be returning the render state here, if not it failed to build + // we'll allow the null return return fNewPipelineStateMap[record]; } - - //it doesn't exist, start a build and wait - //only render thread is allowed to start builds, - //shouldn't be race conditions here + + // it doesn't exist, start a build and wait + // only render thread is allowed to start builds, + // shouldn't be race conditions here StartPipelineBuild(record, &alreadyBuildingCondition); std::unique_lock lock(fPipelineCreationMtx); alreadyBuildingCondition->wait(lock); - - //should be returning the render state here, if not it failed to build - //we'll allow the null return + + // should be returning the render state here, if not it failed to build + // we'll allow the null return return fNewPipelineStateMap[record]; } @@ -1121,34 +1098,34 @@ std::condition_variable* plMetalDevice::PrewarmPipelineStateFor(plMetalPipelineS { MTL::PixelFormat depthFormat = fCurrentDepthFormat; MTL::PixelFormat colorFormat = fCurrentFragmentOutputTexture->pixelFormat(); - + plMetalPipelineRecord record = { depthFormat, colorFormat, - CurrentTargetSampleCount() - }; - + CurrentTargetSampleCount()}; + record.state = std::shared_ptr(pipelineState->Clone()); - //only render thread is allowed to prewarm, no race conditions around - //fConditionMap creation - if(!fNewPipelineStateMap[record] && fConditionMap[record]) { - std::condition_variable *condOut; + // only render thread is allowed to prewarm, no race conditions around + // fConditionMap creation + if (!fNewPipelineStateMap[record] && fConditionMap[record]) { + std::condition_variable* condOut; StartPipelineBuild(record, &condOut); return condOut; } return nullptr; } -bool plMetalDevice::plMetalPipelineRecord::operator==(const plMetalPipelineRecord &p) const { +bool plMetalDevice::plMetalPipelineRecord::operator==(const plMetalPipelineRecord& p) const +{ return depthFormat == p.depthFormat && - colorFormat == p.colorFormat && - sampleCount == p.sampleCount && - state->operator==(*p.state); + colorFormat == p.colorFormat && + sampleCount == p.sampleCount && + state->operator==(*p.state); } MTL::CommandBuffer* plMetalDevice::GetCurrentCommandBuffer() { - if(fCurrentOffscreenCommandBuffer) { + if (fCurrentOffscreenCommandBuffer) { return fCurrentOffscreenCommandBuffer; } return fCurrentCommandBuffer; @@ -1159,30 +1136,30 @@ void plMetalDevice::SubmitCommandBuffer() if (fBlitCommandEncoder) { fBlitCommandEncoder->endEncoding(); fBlitCommandBuffer->commit(); - + fBlitCommandBuffer->release(); fBlitCommandEncoder->release(); - + fBlitCommandBuffer = nullptr; fBlitCommandEncoder = nullptr; } - + fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); fCurrentRenderTargetCommandEncoder = nil; - - if( NeedsPostprocessing() ) { + + if (NeedsPostprocessing()) { PostprocessIntoDrawable(); } - + fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); fCurrentCommandBuffer->commit(); fCurrentCommandBuffer->release(); fCurrentCommandBuffer = nil; - + fCurrentDrawable->release(); fCurrentDrawable = nil; - + // Reset the clear colors for the next pass // Metal clears on framebuffer load - so don't cause a clear // command in this pass to affect the next pass. @@ -1199,41 +1176,42 @@ MTL::SamplerState* plMetalDevice::SampleStateForClampFlags(hsGMatState::hsGMatCl return fSamplerStates[sampleState]; } -void plMetalDevice::CreateGammaAdjustState() { - MTL::RenderPipelineDescriptor *gammaDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); - MTL::Library* library = fMetalDevice->newDefaultLibrary(); - +void plMetalDevice::CreateGammaAdjustState() +{ + MTL::RenderPipelineDescriptor* gammaDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); + MTL::Library* library = fMetalDevice->newDefaultLibrary(); + gammaDescriptor->setVertexFunction(library->newFunction(NS::MakeConstantString("gammaCorrectVertex"))->autorelease()); gammaDescriptor->setFragmentFunction(library->newFunction(NS::MakeConstantString("gammaCorrectFragment"))->autorelease()); - + library->release(); - + gammaDescriptor->colorAttachments()->object(0)->setPixelFormat(fFramebufferFormat); - - NS::Error *error; + + NS::Error* error; fGammaAdjustState->release(); fGammaAdjustState = fMetalDevice->newRenderPipelineState(gammaDescriptor, &error); gammaDescriptor->release(); } -void plMetalDevice::PostprocessIntoDrawable() { - +void plMetalDevice::PostprocessIntoDrawable() +{ if (!fGammaAdjustState) { CreateGammaAdjustState(); } - - //Gamma adjust + + // Gamma adjust MTL::RenderPassDescriptor* gammaPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); gammaPassDescriptor->colorAttachments()->object(0)->setLoadAction(MTL::LoadActionDontCare); gammaPassDescriptor->colorAttachments()->object(0)->setTexture(fCurrentDrawable->texture()); gammaPassDescriptor->colorAttachments()->object(0)->setStoreAction(MTL::StoreActionStore); - + MTL::RenderCommandEncoder* gammaAdjustEncoder = fCurrentCommandBuffer->renderCommandEncoder(gammaPassDescriptor); - + gammaAdjustEncoder->setRenderPipelineState(fGammaAdjustState); - + static const float fullFrameCoords[16] = { - //first pair is vertex, second pair is texture + // first pair is vertex, second pair is texture -1, -1, 0, 1, 1, -1, 1, 1, -1, 1, 0, 0, @@ -1257,15 +1235,15 @@ std::size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMeta MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() { - //return the current render command encoder - //if a framebuffer wasn't set, assume screen, emulating GL - if(fCurrentRenderTargetCommandEncoder) { + // return the current render command encoder + // if a framebuffer wasn't set, assume screen, emulating GL + if (fCurrentRenderTargetCommandEncoder) { return fCurrentRenderTargetCommandEncoder; } - + if (!fCurrentRenderTargetCommandEncoder) { BeginNewRenderPass(); - + if (fCurrentRenderTarget) { fClearRenderTargetColor = simd_make_float4(0.0f, 0.0f, 0.0f, 1.0f); fShouldClearRenderTarget = false; @@ -1276,7 +1254,7 @@ MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() fClearDrawableDepth = 1.0; } } - + return fCurrentRenderTargetCommandEncoder; } @@ -1287,14 +1265,14 @@ CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() void plMetalDevice::BlitTexture(MTL::Texture* src, MTL::Texture* dst) { - //FIXME: BlitTexture current unused - this used to create private GPU only textures through a copy from a CPU texture. + // FIXME: BlitTexture current unused - this used to create private GPU only textures through a copy from a CPU texture. if (fBlitCommandEncoder == nullptr) { fBlitCommandBuffer = fCommandQueue->commandBuffer()->retain(); - //enqueue so we go to the front of the line before render + // enqueue so we go to the front of the line before render fBlitCommandBuffer->enqueue(); fBlitCommandEncoder = fBlitCommandBuffer->blitCommandEncoder()->retain(); } - + fBlitCommandEncoder->copyFromTexture(src, 0, 0, MTL::Origin(0, 0, 0), MTL::Size(src->width(), src->height(), 0), dst, 0, 0, MTL::Origin(0, 0, 0)); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 089ac3bc61..8abaeef984 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -42,20 +42,17 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef _plMetalDevice_h_ #define _plMetalDevice_h_ -#include "HeadSpin.h" - -#include "hsGMatState.h" - -#include "plMetalDeviceRef.h" -#include "hsMatrix44.h" +#include #include #include -#include - -#include #include +#include +#include "HeadSpin.h" +#include "hsGMatState.h" +#include "hsMatrix44.h" +#include "plMetalDeviceRef.h" #include "plSurface/plShader.h" #include "plSurface/plShaderTable.h" @@ -67,50 +64,49 @@ class plCubicEnvironmap; class plLayerInterface; class plMetalPipelineState; -//NOTE: Results of this will be row major +// NOTE: Results of this will be row major matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst); class plMetalDevice { - friend plMetalPipeline; friend class plMetalMaterialShaderRef; friend class plMetalPlateManager; friend class plMetalPipelineState; - + public: typedef plMetalVertexBufferRef VertexBufferRef; typedef plMetalIndexBufferRef IndexBufferRef; typedef plMetalTextureRef TextureRef; - + public: - plMetalPipeline* fPipeline; - - hsWindowHndl fDevice; - hsWindowHndl fWindow; - - const char* fErrorMsg; - - MTL::RenderCommandEncoder* CurrentRenderCommandEncoder(); - MTL::Device* fMetalDevice; - MTL::CommandQueue* fCommandQueue; - MTL::Buffer* fCurrentIndexBuffer; - - size_t fActiveThread; - matrix_float4x4 fMatrixProj; - matrix_float4x4 fMatrixL2W; - matrix_float4x4 fMatrixW2L; - matrix_float4x4 fMatrixW2C; - matrix_float4x4 fMatrixC2W; - + plMetalPipeline* fPipeline; + + hsWindowHndl fDevice; + hsWindowHndl fWindow; + + const char* fErrorMsg; + + MTL::RenderCommandEncoder* CurrentRenderCommandEncoder(); + MTL::Device* fMetalDevice; + MTL::CommandQueue* fCommandQueue; + MTL::Buffer* fCurrentIndexBuffer; + + size_t fActiveThread; + matrix_float4x4 fMatrixProj; + matrix_float4x4 fMatrixL2W; + matrix_float4x4 fMatrixW2L; + matrix_float4x4 fMatrixW2C; + matrix_float4x4 fMatrixC2W; + public: - - struct plMetalLinkedPipeline { - const MTL::RenderPipelineState *pipelineState; - const MTL::Function *fragFunction; - const MTL::Function *vertexFunction; + struct plMetalLinkedPipeline + { + const MTL::RenderPipelineState* pipelineState; + const MTL::Function* fragFunction; + const MTL::Function* vertexFunction; }; - + plMetalDevice(); bool InitDevice(); @@ -128,9 +124,8 @@ class plMetalDevice /** Translate our viewport into a GL viewport. */ void SetViewport(); - bool BeginRender(); - + /* Device Ref Functions **************************************************/ void SetupVertexBufferRef(plGBufferGroup* owner, uint32_t idx, VertexBufferRef* vRef); void CheckStaticVertexBuffer(VertexBufferRef* vRef, plGBufferGroup* owner, uint32_t idx); @@ -144,126 +139,126 @@ class plMetalDevice void CheckTexture(TextureRef* tRef); void MakeTextureRef(TextureRef* tRef, plMipmap* img); void MakeCubicTextureRef(TextureRef* tRef, plCubicEnvironmap* img); - - + const char* GetErrorString() const { return fErrorMsg; } - + void SetProjectionMatrix(const hsMatrix44& src); void SetWorldToCameraMatrix(const hsMatrix44& src); void SetLocalToWorldMatrix(const hsMatrix44& src); - - void PopulateTexture(plMetalDevice::TextureRef *tRef, plMipmap *img, uint slice); - uint ConfigureAllowedLevels(plMetalDevice::TextureRef *tRef, plMipmap *mipmap); - - //stencil states are expensive to make, they should be cached - //FIXME: There should be a function to pair these with hsGMatState - MTL::DepthStencilState *fNoZReadStencilState; - MTL::DepthStencilState *fNoZWriteStencilState; - MTL::DepthStencilState *fNoZReadOrWriteStencilState; - MTL::DepthStencilState *fReverseZStencilState; - MTL::DepthStencilState *fDefaultStencilState; - uint8_t fSampleCount; - - ///Create a new command buffer to encode all the operations needed to draw a frame - //Currently requires a CA drawable and not a Metal drawable. In since CA drawable is only abstract implementation I know about, not sure where we would find others? - void CreateNewCommandBuffer(CA::MetalDrawable* drawable); + + void PopulateTexture(plMetalDevice::TextureRef* tRef, plMipmap* img, uint slice); + uint ConfigureAllowedLevels(plMetalDevice::TextureRef* tRef, plMipmap* mipmap); + + // stencil states are expensive to make, they should be cached + // FIXME: There should be a function to pair these with hsGMatState + MTL::DepthStencilState* fNoZReadStencilState; + MTL::DepthStencilState* fNoZWriteStencilState; + MTL::DepthStencilState* fNoZReadOrWriteStencilState; + MTL::DepthStencilState* fReverseZStencilState; + MTL::DepthStencilState* fDefaultStencilState; + uint8_t fSampleCount; + + /// Create a new command buffer to encode all the operations needed to draw a frame + // Currently requires a CA drawable and not a Metal drawable. In since CA drawable is only abstract implementation I know about, not sure where we would find others? + void CreateNewCommandBuffer(CA::MetalDrawable* drawable); MTL::CommandBuffer* GetCurrentCommandBuffer(); - CA::MetalDrawable* GetCurrentDrawable(); - ///Submit the command buffer to the GPU and draws all the render passes. Clears the current command buffer. - void SubmitCommandBuffer(); - void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); - + CA::MetalDrawable* GetCurrentDrawable(); + /// Submit the command buffer to the GPU and draws all the render passes. Clears the current command buffer. + void SubmitCommandBuffer(); + void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); + void SetMaxAnsiotropy(uint8_t maxAnsiotropy); void SetMSAASampleCount(uint8_t sampleCount); - + MTL::SamplerState* SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState); - - NS::UInteger CurrentTargetSampleCount() { + + NS::UInteger CurrentTargetSampleCount() + { if (fCurrentRenderTarget) { return 1; } else { return fSampleCount; } } - + void BlitTexture(MTL::Texture* src, MTL::Texture* dst); - + void EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma); - + MTL::PixelFormat GetFramebufferFormat() { return fFramebufferFormat; }; - + private: - - struct plMetalPipelineRecord { - MTL::PixelFormat depthFormat; - MTL::PixelFormat colorFormat; - NS::UInteger sampleCount; + struct plMetalPipelineRecord + { + MTL::PixelFormat depthFormat; + MTL::PixelFormat colorFormat; + NS::UInteger sampleCount; std::shared_ptr state; - - bool operator==(const plMetalPipelineRecord &p) const; + + bool operator==(const plMetalPipelineRecord& p) const; }; - - + struct plMetalPipelineRecordHashFunction { std::size_t operator()(plMetalPipelineRecord const& s) const noexcept; }; - - std::unordered_map fNewPipelineStateMap; - //the condition map allows consumers of pipeline states to wait until the pipeline state is ready - std::unordered_map fConditionMap; - std::mutex fPipelineCreationMtx; - void StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable **condOut); - std::condition_variable* PrewarmPipelineStateFor(plMetalPipelineState* pipelineState); - + + std::unordered_map fNewPipelineStateMap; + // the condition map allows consumers of pipeline states to wait until the pipeline state is ready + std::unordered_map fConditionMap; + std::mutex fPipelineCreationMtx; + void StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable** condOut); + std::condition_variable* PrewarmPipelineStateFor(plMetalPipelineState* pipelineState); + protected: plMetalLinkedPipeline* PipelineState(plMetalPipelineState* pipelineState); - + MTL::Texture* fGammaLUTTexture; - + void SetFramebufferFormat(MTL::PixelFormat format); - + private: - MTL::PixelFormat fFramebufferFormat; - - //these are internal bits for backing the current render pass - //private because the functions should be used to keep a consistant - //render pass state - MTL::CommandBuffer* fCurrentCommandBuffer; - MTL::CommandBuffer* fCurrentOffscreenCommandBuffer; - MTL::RenderCommandEncoder* fCurrentRenderTargetCommandEncoder; - - MTL::Texture* fCurrentDrawableDepthTexture; - MTL::Texture* fCurrentFragmentOutputTexture; - MTL::Texture* fCurrentUnprocessedOutputTexture; - MTL::Texture* fCurrentFragmentMSAAOutputTexture; - - CA::MetalDrawable* fCurrentDrawable; - MTL::PixelFormat fCurrentDepthFormat; - simd_float4 fClearRenderTargetColor; - simd_float4 fClearDrawableColor; - bool fShouldClearRenderTarget; - bool fShouldClearDrawable; - float fClearRenderTargetDepth; - float fClearDrawableDepth; - plRenderTarget* fCurrentRenderTarget; - MTL::SamplerState* fSamplerStates[4]; - - MTL::CommandBuffer* fBlitCommandBuffer; - MTL::BlitCommandEncoder* fBlitCommandEncoder; - - bool NeedsPostprocessing() { + MTL::PixelFormat fFramebufferFormat; + + // these are internal bits for backing the current render pass + // private because the functions should be used to keep a consistant + // render pass state + MTL::CommandBuffer* fCurrentCommandBuffer; + MTL::CommandBuffer* fCurrentOffscreenCommandBuffer; + MTL::RenderCommandEncoder* fCurrentRenderTargetCommandEncoder; + + MTL::Texture* fCurrentDrawableDepthTexture; + MTL::Texture* fCurrentFragmentOutputTexture; + MTL::Texture* fCurrentUnprocessedOutputTexture; + MTL::Texture* fCurrentFragmentMSAAOutputTexture; + + CA::MetalDrawable* fCurrentDrawable; + MTL::PixelFormat fCurrentDepthFormat; + simd_float4 fClearRenderTargetColor; + simd_float4 fClearDrawableColor; + bool fShouldClearRenderTarget; + bool fShouldClearDrawable; + float fClearRenderTargetDepth; + float fClearDrawableDepth; + plRenderTarget* fCurrentRenderTarget; + MTL::SamplerState* fSamplerStates[4]; + + MTL::CommandBuffer* fBlitCommandBuffer; + MTL::BlitCommandEncoder* fBlitCommandEncoder; + + bool NeedsPostprocessing() + { return fGammaLUTTexture != nullptr; } - void PostprocessIntoDrawable(); - void CreateGammaAdjustState(); + void PostprocessIntoDrawable(); + void CreateGammaAdjustState(); MTL::RenderPipelineState* fGammaAdjustState; - + void BeginNewRenderPass(); void ReleaseSamplerStates(); void ReleaseFramebufferObjects(); - - //Blur states + + // Blur states std::unordered_map fBlurShaders; }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm index 888c029799..2760a02ce2 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm @@ -40,46 +40,61 @@ *==LICENSE==*/ -#include -#include "plMetalDevice.h" #include #include +#include +#include "plMetalDevice.h" + +void plMetalDevice::EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, + float sigma) { + // FIXME: Blurring currently ends a pass - and restarting a pass will possibly clear one or more + // buffers Technically shadow blurring only happens at the end of the render pass though... + CurrentRenderCommandEncoder()->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; + + // look up the shader by sigma value + MPSImageGaussianBlur* blur = (MPSImageGaussianBlur*)fBlurShaders[sigma]; + + // we don't have one, need to create one + if (!blur) { + blur = [[MPSImageGaussianBlur alloc] initWithDevice:(id)fMetalDevice sigma:sigma]; + fBlurShaders[sigma] = (NS::Object*)blur; + } + + // we'd like to do the blur in place, but Metal might not let us. + // if it allocates a new texture, we'll have to glit that data back to the original + id destTexture = (id)texture; + bool result = + [blur encodeToCommandBuffer:(id)commandBuffer + inPlaceTexture:(id*)&destTexture + fallbackCopyAllocator:^id( + MPSKernel* kernel, id commandBuffer, id texture) { + // this copy allocator will release the original texture - that texture is important, + // don't let it + [texture retain]; + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor( + (MTL::PixelFormat)texture.pixelFormat, texture.width, texture.height, false); + descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + return (id)fMetalDevice->newTexture(descriptor)->autorelease(); + }]; -void plMetalDevice::EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma) -{ - //FIXME: Blurring currently ends a pass - and restarting a pass will possibly clear one or more buffers - //Technically shadow blurring only happens at the end of the render pass though... - CurrentRenderCommandEncoder()->endEncoding(); - fCurrentRenderTargetCommandEncoder->release(); - fCurrentRenderTargetCommandEncoder = nil; - - //look up the shader by sigma value - MPSImageGaussianBlur *blur = (MPSImageGaussianBlur *)fBlurShaders[sigma]; - - //we don't have one, need to create one - if (!blur) { - blur = [[MPSImageGaussianBlur alloc] initWithDevice:(id)fMetalDevice sigma:sigma]; - fBlurShaders[sigma] = (NS::Object*)blur; - } - - //we'd like to do the blur in place, but Metal might not let us. - //if it allocates a new texture, we'll have to glit that data back to the original - id destTexture = (id)texture; - bool result = [blur encodeToCommandBuffer:(id)commandBuffer inPlaceTexture:(id*)&destTexture fallbackCopyAllocator:^ id (MPSKernel * kernel, id commandBuffer, id texture) { - //this copy allocator will release the original texture - that texture is important, don't let it - [texture retain]; - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor((MTL::PixelFormat)texture.pixelFormat, texture.width, texture.height, false); - descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); - return (id)fMetalDevice->newTexture(descriptor)->autorelease(); - }]; - - //did Metal change our original texture? - if (destTexture != (id)texture) { - //we'll need to blit the dest texture back to the source - //we just committed a compute pass, buffer should be free for us to create - //a blit encoder - id blitEncoder = [(id)GetCurrentCommandBuffer() blitCommandEncoder]; - [blitEncoder copyFromTexture:destTexture sourceSlice:0 sourceLevel:0 sourceOrigin:MTLOriginMake(0, 0, 0) sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 1) toTexture:(id)texture destinationSlice:0 destinationLevel:0 destinationOrigin:MTLOriginMake(0, 0, 0)]; - [blitEncoder endEncoding]; - } + // did Metal change our original texture? + if (destTexture != (id)texture) { + // we'll need to blit the dest texture back to the source + // we just committed a compute pass, buffer should be free for us to create + // a blit encoder + id blitEncoder = + [(id)GetCurrentCommandBuffer() blitCommandEncoder]; + [blitEncoder copyFromTexture:destTexture + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 1) + toTexture:(id)texture + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + [blitEncoder endEncoding]; + } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp index 772b965ee3..373043be70 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp @@ -7,9 +7,8 @@ #include "plMetalDeviceRef.h" - - -void plMetalDeviceRef::Unlink() { +void plMetalDeviceRef::Unlink() +{ hsAssert(fBack, "plGLDeviceRef not in list"); if (fNext) @@ -18,10 +17,10 @@ void plMetalDeviceRef::Unlink() { fBack = nullptr; fNext = nullptr; - } -void plMetalDeviceRef::Link(plMetalDeviceRef **back) { +void plMetalDeviceRef::Link(plMetalDeviceRef **back) +{ hsAssert(fNext == nullptr && fBack == nullptr, "Trying to link a plMetalDeviceRef that's already linked"); fNext = *back; @@ -42,7 +41,6 @@ plMetalVertexBufferRef::~plMetalVertexBufferRef() Release(); } - void plMetalVertexBufferRef::Release() { SetDirty(true); @@ -50,30 +48,27 @@ void plMetalVertexBufferRef::Release() plMetalTextureRef::~plMetalTextureRef() { - //fTexture->release(); + // fTexture->release(); Release(); } - void plMetalTextureRef::Release() { SetDirty(true); } - plMetalIndexBufferRef::~plMetalIndexBufferRef() { Release(); } - void plMetalIndexBufferRef::Release() { SetDirty(true); } - -plMetalRenderTargetRef::~plMetalRenderTargetRef() { +plMetalRenderTargetRef::~plMetalRenderTargetRef() +{ Release(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h index e6d69ca942..5f4709b2a0 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -42,31 +42,37 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef _plMetalDeviceRef_inc_ #define _plMetalDeviceRef_inc_ -#include "HeadSpin.h" -#include "hsGDeviceRef.h" #include #include +#include "HeadSpin.h" +#include "hsGDeviceRef.h" + class plGBufferGroup; class plBitmap; class plRenderTarget; - class plMetalDeviceRef : public hsGDeviceRef { protected: plMetalDeviceRef* fNext; plMetalDeviceRef** fBack; - + public: - void Unlink(); - void Link(plMetalDeviceRef **back); - plMetalDeviceRef* GetNext() { return fNext; } - bool IsLinked() { return fBack != nullptr; } - + void Unlink(); + void Link(plMetalDeviceRef** back); + plMetalDeviceRef* GetNext() { return fNext; } + bool IsLinked() { return fBack != nullptr; } + bool HasFlag(uint32_t f) const { return 0 != (fFlags & f); } - void SetFlag(uint32_t f, bool on) { if(on) fFlags |= f; else fFlags &= ~f; } - + void SetFlag(uint32_t f, bool on) + { + if (on) + fFlags |= f; + else + fFlags &= ~f; + } + virtual void Release() = 0; plMetalDeviceRef(); @@ -75,79 +81,81 @@ class plMetalDeviceRef : public hsGDeviceRef /* The buffer pool stores and recycles buffers so that Plasma can encode GPU commands and render in parallel. That means we can't touch buffers the GPU is using, and if a pass or frame rewrites a buffer we have to make sure it's not stomping on something that is already attached to a frame. Because Metal can triple buffer, the first dimension of caching is hard coded to 3. Some ages will also rewrite buffers an unspecified number of times between render passes. For example: A reflection render and a main render might have different index buffers. So the second dimension of caching uses an unbounded vector that will hold enough buffers to render in any one age. - + Buffer pools do not allocate buffers, they only store them. The outside caller is responsible for allocating a buffer and then setting it. The buffer pool will retain any buffers within the pool, and automatically release them when they are overwritten or the pool is deallocated. - + Because buffers are only stored on write, and no allocations happen within the pool, overhead is kept low for static buffers. Completely static buffers will never expand the pool if they only write once. */ -class plMetalBufferPoolRef : public plMetalDeviceRef { +class plMetalBufferPoolRef : public plMetalDeviceRef +{ public: - uint32_t fCurrentFrame; - uint32_t fCurrentPass; - uint32_t fLastWriteFrameTime; - - plMetalBufferPoolRef() : - plMetalDeviceRef(), - fLastWriteFrameTime(0), - fCurrentPass(0), - fCurrentFrame(0), - fBuffer(nullptr) + uint32_t fCurrentFrame; + uint32_t fCurrentPass; + uint32_t fLastWriteFrameTime; + + plMetalBufferPoolRef() : plMetalDeviceRef(), + fLastWriteFrameTime(0), + fCurrentPass(0), + fCurrentFrame(0), + fBuffer(nullptr) { } - - //Prepare for write must be called anytime a new pass is going to write a buffer. It moves internal record keeping to reflect that either a new frame or new pass is about to write to the pool. - void PrepareForWrite() { - //if we've moved frames since the last time a write happened, reset our current pass index to 0, otherwise increment the current pass - if(fLastWriteFrameTime != fFrameTime) { + + // Prepare for write must be called anytime a new pass is going to write a buffer. It moves internal record keeping to reflect that either a new frame or new pass is about to write to the pool. + void PrepareForWrite() + { + // if we've moved frames since the last time a write happened, reset our current pass index to 0, otherwise increment the current pass + if (fLastWriteFrameTime != fFrameTime) { fCurrentPass = 0; fLastWriteFrameTime = fFrameTime; fCurrentFrame = (++fCurrentFrame % 3); } else { fCurrentPass++; } - - //update the current buffer focused, if the is no buffer to focus set it to null + + // update the current buffer focused, if the is no buffer to focus set it to null uint32_t currentSize = uint32_t(fBuffers[fCurrentFrame].size()); - if(fCurrentPass < currentSize) { + if (fCurrentPass < currentSize) { fBuffer = fBuffers[fCurrentFrame][fCurrentPass]; } else { fBuffer = nullptr; } } - + static void SetFrameTime(uint32_t frameTime) { fFrameTime = frameTime; }; - + MTL::Buffer* GetBuffer() { return fBuffer; }; - - void SetBuffer(MTL::Buffer* buffer) { + + void SetBuffer(MTL::Buffer* buffer) + { fBuffer = buffer->retain(); uint32_t currentSize = uint32_t(fBuffers[fCurrentFrame].size()); - //if the current vector doesn't have enough room for the entry, resize it - if(fCurrentPass >= currentSize) { + // if the current vector doesn't have enough room for the entry, resize it + if (fCurrentPass >= currentSize) { fBuffers[fCurrentFrame].resize(++currentSize); - } else if(fBuffers[fCurrentFrame][fCurrentPass]) { - //if we're replacing an existing entry, release the old one + } else if (fBuffers[fCurrentFrame][fCurrentPass]) { + // if we're replacing an existing entry, release the old one fBuffers[fCurrentFrame][fCurrentPass]->release(); } fBuffers[fCurrentFrame][fCurrentPass] = fBuffer; } - - void Release() { - for(int i=0; i<3; i++) { + + void Release() + { + for (int i = 0; i < 3; i++) { for (auto buffer : fBuffers[i]) { buffer->release(); } } fBuffer = nullptr; } - + private: - static uint32_t fFrameTime; - MTL::Buffer* fBuffer; + static uint32_t fFrameTime; + MTL::Buffer* fBuffer; std::vector fBuffers[3]; }; - class plMetalVertexBufferRef : public plMetalBufferPoolRef { public: @@ -158,13 +166,14 @@ class plMetalVertexBufferRef : public plMetalBufferPoolRef int32_t fOffset; uint8_t fFormat; uint8_t* fData; - - uint32_t fRefTime; - - enum { - kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef - kVolatile = 0x20, - kSkinned = 0x40 + + uint32_t fRefTime; + + enum + { + kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef + kVolatile = 0x20, + kSkinned = 0x40 }; bool RebuiltSinceUsed() const { return HasFlag(kRebuiltSinceUsed); } @@ -178,30 +187,27 @@ class plMetalVertexBufferRef : public plMetalBufferPoolRef bool Expired(uint32_t t) const { return Volatile() && (IsDirty() || (fRefTime != t)); } void SetRefTime(uint32_t t) { fRefTime = t; } - - plMetalVertexBufferRef() : - plMetalBufferPoolRef(), - fCount(0), - fIndex(0), - fVertexSize(0), - fOffset(0), - fOwner(nullptr), - fData(nullptr), - fFormat(0), - fRefTime(0) + + plMetalVertexBufferRef() : plMetalBufferPoolRef(), + fCount(0), + fIndex(0), + fVertexSize(0), + fOffset(0), + fOwner(nullptr), + fData(nullptr), + fFormat(0), + fRefTime(0) { } - + virtual ~plMetalVertexBufferRef(); - - - void Link(plMetalVertexBufferRef** back ) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalVertexBufferRef* GetNext() { return (plMetalVertexBufferRef*)fNext; } - + + void Link(plMetalVertexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalVertexBufferRef* GetNext() { return (plMetalVertexBufferRef*)fNext; } + void Release(); }; - class plMetalIndexBufferRef : public plMetalBufferPoolRef { public: @@ -210,12 +216,13 @@ class plMetalIndexBufferRef : public plMetalBufferPoolRef plGBufferGroup* fOwner; uint32_t fRefTime; uint32_t fLastWriteFrameTime; - - enum { - kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef - kVolatile = 0x20 + + enum + { + kRebuiltSinceUsed = 0x10, // kDirty = 0x1 is in hsGDeviceRef + kVolatile = 0x20 }; - + bool RebuiltSinceUsed() const { return HasFlag(kRebuiltSinceUsed); } void SetRebuiltSinceUsed(bool b) { SetFlag(kRebuiltSinceUsed, b); } @@ -224,59 +231,55 @@ class plMetalIndexBufferRef : public plMetalBufferPoolRef bool Expired(uint32_t t) const { return Volatile() && (IsDirty() || (fRefTime != t)); } void SetRefTime(uint32_t t) { fRefTime = t; } - + void Release(); - - void Link(plMetalIndexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + + void Link(plMetalIndexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalIndexBufferRef* GetNext() { return (plMetalIndexBufferRef*)fNext; } virtual ~plMetalIndexBufferRef(); - - plMetalIndexBufferRef(): - plMetalBufferPoolRef(), - fCount(0), - fIndex(0), - fRefTime(0), - fLastWriteFrameTime(0), - fOwner(nullptr) { + + plMetalIndexBufferRef() : plMetalBufferPoolRef(), + fCount(0), + fIndex(0), + fRefTime(0), + fLastWriteFrameTime(0), + fOwner(nullptr) + { } }; - class plMetalTextureRef : public plMetalDeviceRef { public: - plBitmap* fOwner; - - int32_t fLevels; - MTL::Texture* fTexture; + plBitmap* fOwner; + + int32_t fLevels; + MTL::Texture* fTexture; MTL::PixelFormat fFormat; - - void Link(plMetalTextureRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalTextureRef* GetNext() { return (plMetalTextureRef*)fNext; } - - plMetalTextureRef() : - plMetalDeviceRef(), - fOwner(nullptr), - fTexture(nullptr), - fLevels(1) + + void Link(plMetalTextureRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalTextureRef* GetNext() { return (plMetalTextureRef*)fNext; } + + plMetalTextureRef() : plMetalDeviceRef(), + fOwner(nullptr), + fTexture(nullptr), + fLevels(1) { } - + virtual ~plMetalTextureRef(); - + void Release(); }; - - -class plMetalRenderTargetRef: public plMetalTextureRef +class plMetalRenderTargetRef : public plMetalTextureRef { public: - MTL::Texture* fDepthBuffer; + MTL::Texture* fDepthBuffer; + + void Link(plMetalRenderTargetRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalRenderTargetRef* GetNext() { return (plMetalRenderTargetRef*)fNext; } - void Link(plMetalRenderTargetRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalRenderTargetRef* GetNext() { return (plMetalRenderTargetRef*)fNext; } - plMetalRenderTargetRef() : fDepthBuffer(nullptr) { } @@ -288,6 +291,4 @@ class plMetalRenderTargetRef: public plMetalTextureRef virtual void SetOwner(plRenderTarget* targ) { fOwner = (plBitmap*)targ; } }; - #endif // _plGLDeviceRef_inc_ - diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp index 2dce9f3382..81233adbdc 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp @@ -39,11 +39,9 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com Mead, WA 99021 *==LICENSE==*/ -#include "plPipeline/hsWinRef.h" - -#include "plMetalPipeline.h" #include "plMetalDeviceRef.h" - +#include "plMetalPipeline.h" +#include "plPipeline/hsWinRef.h" #include "plProfile.h" #include "plStatusLog/plStatusLog.h" @@ -51,13 +49,12 @@ plProfile_CreateMemCounter("Vertices", "Memory", MemVertex); plProfile_CreateMemCounter("Indices", "Memory", MemIndex); plProfile_CreateMemCounter("Textures", "Memory", MemTexture); - /***************************************************************************** ** Generic plGLDeviceRef Functions ** *****************************************************************************/ plMetalDeviceRef::plMetalDeviceRef() -: fNext(nullptr), - fBack(nullptr) + : fNext(nullptr), + fBack(nullptr) { } @@ -67,7 +64,8 @@ plMetalDeviceRef::~plMetalDeviceRef() Unlink(); } -void plMetalDeviceRef::Unlink() { +void plMetalDeviceRef::Unlink() +{ hsAssert(fBack, "plGLDeviceRef not in list"); if (fNext) @@ -76,12 +74,12 @@ void plMetalDeviceRef::Unlink() { fBack = nullptr; fNext = nullptr; - } uint32_t plMetalBufferPoolRef::fFrameTime(0); -void plMetalDeviceRef::Link(plMetalDeviceRef **back) { +void plMetalDeviceRef::Link(plMetalDeviceRef **back) +{ hsAssert(fNext == nullptr && fBack == nullptr, "Trying to link a plMetalDeviceRef that's already linked"); fNext = *back; @@ -91,7 +89,6 @@ void plMetalDeviceRef::Link(plMetalDeviceRef **back) { *back = this; } - /***************************************************************************** ** Vertex buffer cleanup Functions ** *****************************************************************************/ @@ -103,13 +100,11 @@ plMetalVertexBufferRef::~plMetalVertexBufferRef() Release(); } - void plMetalVertexBufferRef::Release() { SetDirty(true); } - /***************************************************************************** ** Index buffer cleanup Functions ** *****************************************************************************/ @@ -124,7 +119,6 @@ void plMetalIndexBufferRef::Release() SetDirty(true); } - /***************************************************************************** ** Texture cleanup Functions ** *****************************************************************************/ @@ -141,23 +135,23 @@ void plMetalTextureRef::Release() plMetalTextureRef::~plMetalTextureRef() { Release(); - + if (fNext != nullptr || fBack != nullptr) Unlink(); } - /***************************************************************************** ** FrameBuffer cleanup Functions ** *****************************************************************************/ -plMetalRenderTargetRef::~plMetalRenderTargetRef() { +plMetalRenderTargetRef::~plMetalRenderTargetRef() +{ Release(); } void plMetalRenderTargetRef::Release() { - if(fDepthBuffer) { + if (fDepthBuffer) { fDepthBuffer->release(); fDepthBuffer = nullptr; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm index b86372de01..f67485491e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm @@ -47,46 +47,48 @@ #include -#include "plMetalPipeline.h" #include +#include "plMetalPipeline.h" -void plMetalEnumerate::Enumerate(std::vector& records) -{ - //For now - just use the default device. If there is a high power discrete device - this will spin it up. - //This will also automatically pin us to an eGPU if present and the user has configured us to use it. - MTL::Device* device = MTL::CreateSystemDefaultDevice(); - - if (device) { - hsG3DDeviceRecord devRec; - devRec.SetG3DDeviceType(hsG3DDeviceSelector::kDevTypeMetal); - devRec.SetDriverName("Metal"); - devRec.SetDeviceDesc(device->name()->utf8String()); - //Metal has ways to query capabilities, but doesn't expose a flat version - //Populate with the OS version - @autoreleasepool { - NSProcessInfo *processInfo = [NSProcessInfo processInfo]; - NSOperatingSystemVersion version = processInfo.operatingSystemVersion; - NSString *versionString = [NSString stringWithFormat:@"%li.%li.%li", (long)version.majorVersion, (long)version.minorVersion, version.patchVersion]; - devRec.SetDriverVersion([versionString cStringUsingEncoding:NSUTF8StringEncoding]); - } - devRec.SetDriverDesc(device->name()->utf8String()); - - devRec.SetCap(hsG3DDeviceSelector::kCapsMipmap); - devRec.SetCap(hsG3DDeviceSelector::kCapsPerspective); - devRec.SetCap(hsG3DDeviceSelector::kCapsCompressTextures); - devRec.SetCap(hsG3DDeviceSelector::kCapsDoesSmallTextures); - devRec.SetCap(hsG3DDeviceSelector::kCapsPixelShader); - devRec.SetCap(hsG3DDeviceSelector::kCapsHardware); - - devRec.SetLayersAtOnce(8); - - // Just make a fake mode so the device selector will let it through - hsG3DDeviceMode devMode; - devMode.SetWidth(hsG3DDeviceSelector::kDefaultWidth); - devMode.SetHeight(hsG3DDeviceSelector::kDefaultHeight); - devMode.SetColorDepth(hsG3DDeviceSelector::kDefaultDepth); - devRec.GetModes().emplace_back(devMode); - - records.emplace_back(devRec); +void plMetalEnumerate::Enumerate(std::vector& records) { + // For now - just use the default device. If there is a high power discrete device - this will + // spin it up. This will also automatically pin us to an eGPU if present and the user has + // configured us to use it. + MTL::Device* device = MTL::CreateSystemDefaultDevice(); + + if (device) { + hsG3DDeviceRecord devRec; + devRec.SetG3DDeviceType(hsG3DDeviceSelector::kDevTypeMetal); + devRec.SetDriverName("Metal"); + devRec.SetDeviceDesc(device->name()->utf8String()); + // Metal has ways to query capabilities, but doesn't expose a flat version + // Populate with the OS version + @autoreleasepool { + NSProcessInfo* processInfo = [NSProcessInfo processInfo]; + NSOperatingSystemVersion version = processInfo.operatingSystemVersion; + NSString* versionString = + [NSString stringWithFormat:@"%li.%li.%li", (long)version.majorVersion, + (long)version.minorVersion, version.patchVersion]; + devRec.SetDriverVersion([versionString cStringUsingEncoding:NSUTF8StringEncoding]); } + devRec.SetDriverDesc(device->name()->utf8String()); + + devRec.SetCap(hsG3DDeviceSelector::kCapsMipmap); + devRec.SetCap(hsG3DDeviceSelector::kCapsPerspective); + devRec.SetCap(hsG3DDeviceSelector::kCapsCompressTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsDoesSmallTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsPixelShader); + devRec.SetCap(hsG3DDeviceSelector::kCapsHardware); + + devRec.SetLayersAtOnce(8); + + // Just make a fake mode so the device selector will let it through + hsG3DDeviceMode devMode; + devMode.SetWidth(hsG3DDeviceSelector::kDefaultWidth); + devMode.SetHeight(hsG3DDeviceSelector::kDefaultHeight); + devMode.SetColorDepth(hsG3DDeviceSelector::kDefaultDepth); + devRec.GetModes().emplace_back(devMode); + + records.emplace_back(devRec); + } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp index c229751f7f..a3f84bf7ff 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp @@ -39,20 +39,18 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com Mead, WA 99021 *==LICENSE==*/ -#include "HeadSpin.h" -#include "hsWindows.h" - -#include - #include "plMetalFragmentShader.h" -#include "plSurface/plShader.h" +#include +#include "HeadSpin.h" +#include "hsWindows.h" #include "plDrawable/plGBufferGroup.h" #include "plMetalPipeline.h" +#include "plSurface/plShader.h" plMetalFragmentShader::plMetalFragmentShader(plShader* owner) -: plMetalShader(owner) + : plMetalShader(owner) { } @@ -65,17 +63,15 @@ void plMetalFragmentShader::Release() { fPipe = nil; - //ISetError(nil); + // ISetError(nil); } bool plMetalFragmentShader::ISetConstants(plMetalPipeline* pipe) { - if( fOwner->GetNumConsts() ) - { - float *ptr = (float *)fOwner->GetConstBasePtr(); - pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); + if (fOwner->GetNumConsts()) { + float* ptr = (float*)fOwner->GetConstBasePtr(); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); } return true; } - diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h index e9f3fd3f97..d2ac66e265 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h @@ -51,16 +51,14 @@ class plMetalPipeline; class plMetalFragmentShader : public plMetalShader { protected: - - public: - virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. + virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. plMetalFragmentShader(plShader* owner); virtual ~plMetalFragmentShader(); - virtual void Release(); - void Link(plMetalFragmentShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalFragmentShader* GetNext() { return (plMetalFragmentShader*)fNext; } + virtual void Release(); + void Link(plMetalFragmentShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalFragmentShader* GetNext() { return (plMetalFragmentShader*)fNext; } }; #endif // plMetalFragmentShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index c4ef73a418..44d9135ad1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -40,32 +40,29 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ -#include #include "plMetalMaterialShaderRef.h" +#include + #include "HeadSpin.h" #include "hsBitVector.h" - +#include "hsGMatState.inl" #include "plDrawable/plGBufferGroup.h" -#include "plGImage/plMipmap.h" #include "plGImage/plCubicEnvironmap.h" -#include "plPipeline.h" +#include "plGImage/plMipmap.h" +#include "plMetalDevice.h" +#include "plMetalPipeline.h" #include "plPipeDebugFlags.h" +#include "plPipeline.h" #include "plPipeline/plCubicRenderTarget.h" #include "plPipeline/plRenderTarget.h" #include "plSurface/hsGMaterial.h" #include "plSurface/plLayerInterface.h" -#include "hsGMatState.inl" - -#include "plMetalDevice.h" -#include "plMetalPipeline.h" - -plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline *pipe) : -fPipeline { pipe }, -fMaterial { mat }, -fFragFunction(), -fNumPasses(0) +plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe) : fPipeline{pipe}, + fMaterial{mat}, + fFragFunction(), + fNumPasses(0) { fDevice = pipe->fDevice.fMetalDevice; fFragFunction = pipe->fFragFunction; @@ -79,18 +76,18 @@ plMetalMaterialShaderRef::~plMetalMaterialShaderRef() void plMetalMaterialShaderRef::Release() { - for(auto & buffer : fPassArgumentBuffers) { + for (auto& buffer : fPassArgumentBuffers) { buffer->release(); buffer = nil; } fPassArgumentBuffers.clear(); - + fNumPasses = 0; } void plMetalMaterialShaderRef::CheckMateralRef() { - if(IsDirty()) { + if (IsDirty()) { /* Something (like avatars) might have modified our textures. If we're dirty - clear all cached state. @@ -99,15 +96,15 @@ void plMetalMaterialShaderRef::CheckMateralRef() fPassIndices.clear(); fPassLengths.clear(); fFragmentShaderDescriptions.clear(); - - for(MTL::Buffer* buffer: fPassArgumentBuffers) { + + for (MTL::Buffer* buffer : fPassArgumentBuffers) { buffer->release(); } fPassArgumentBuffers.clear(); } - if(fNumPasses == 0) { + if (fNumPasses == 0) { ILoopOverLayers(); - + for (size_t i = 0; i < fMaterial->GetNumLayers(); i++) { plLayerInterface* layer = fMaterial->GetLayer(i); if (!layer) { @@ -120,80 +117,81 @@ void plMetalMaterialShaderRef::CheckMateralRef() SetDirty(false); } -//fast encode doesn't support piggybacks or push over layers, but it does use preloaded data on the GPU so it's much faster. Use this encoder if there are no piggybacks or pushover layers -void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass) +// fast encode doesn't support piggybacks or push over layers, but it does use preloaded data on the GPU so it's much faster. Use this encoder if there are no piggybacks or pushover layers +void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass) { for (uint32_t i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { plLayerInterface* layer = fMaterial->GetLayer(i); - + if (!layer) { continue; } fPipeline->CheckTextureRef(layer); - + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); if (!img) { continue; } - + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); - //if (!texRef->fTexture) { - // continue; - //} - + // if (!texRef->fTexture) { + // continue; + // } + assert(i - GetPassIndex(pass) >= 0); EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer); } - + encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, FragmentShaderArgumentUniforms); } -void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription* passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) +void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass, plMetalFragmentShaderDescription* passDescription, std::vector* piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) { - std::vector layers = GetLayersForPass(pass); - - if(piggyBacks) { + + if (piggyBacks) { layers.insert(layers.end(), piggyBacks->begin(), piggyBacks->end()); } - + plMetalFragmentShaderArgumentBuffer uniforms; - - IHandleMaterial(GetPassIndex(pass), passDescription, &uniforms, piggyBacks, - [&](plLayerInterface* layer, uint32_t index) { + + IHandleMaterial( + GetPassIndex(pass), passDescription, &uniforms, piggyBacks, + [&](plLayerInterface* layer, uint32_t index) { layer = preEncodeTransform(layer, index); IBuildLayerTexture(encoder, index, layer); - + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); - + assert(index - GetPassIndex(pass) >= 0); EncodeTransform(layer, &vertexUniforms->uvTransforms[index]); - + return layer; - }, [&](plLayerInterface* layer, uint32_t index) { + }, + [&](plLayerInterface* layer, uint32_t index) { layer = postEncodeTransform(layer, index); return layer; }); - + encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), FragmentShaderArgumentUniforms); } -void plMetalMaterialShaderRef::EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform) { +void plMetalMaterialShaderRef::EncodeTransform(plLayerInterface* layer, UVOutDescriptor* transform) +{ matrix_float4x4 tXfm; hsMatrix2SIMD(layer->GetTransform(), &tXfm); transform->transform = tXfm; transform->UVWSrc = layer->GetUVWSrc(); } -//This is old - supporting the plate code. -//FIXME: Replace the plate codes path to texturing -void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass) +// This is old - supporting the plate code. +// FIXME: Replace the plate codes path to texturing +void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder* encoder, uint pass) { - plLayerInterface* layer = fMaterial->GetLayer(pass); if (!layer) { return; @@ -212,10 +210,10 @@ void plMetalMaterialShaderRef::prepareTextures(MTL::RenderCommandEncoder *encode if (!texRef->fTexture) { return; } - + if (plCubicEnvironmap::ConvertNoRef(layer->GetTexture()) != nullptr) { } else if (plMipmap::ConvertNoRef(layer->GetTexture()) != nullptr || plRenderTarget::ConvertNoRef(layer->GetTexture()) != nullptr) { - encoder->setFragmentTexture(texRef->fTexture, FragmentShaderArgumentTexture); + encoder->setFragmentTexture(texRef->fTexture, FragmentShaderArgumentTexture); } } @@ -223,53 +221,53 @@ void plMetalMaterialShaderRef::ILoopOverLayers() { uint32_t pass = 0; - for (uint32_t j = 0; j < fMaterial->GetNumLayers(); ) - { + for (uint32_t j = 0; j < fMaterial->GetNumLayers();) { uint32_t currLayer = j; - - //Create "fast encode" buffers - //Fast encode can be used when there are no piggybacks or pushover layers. We'll load as much of the - //base state of this layer as we can onto the GPU. Using fast encode, the renderer can avoid encoding - //a lot of the render state, it will be on the GPU already. - //I'd like to encode more data here, and use a heap. The heap hasn't happened yet because heaps are - //private memory, and we don't have a window yet for a blit phase into private memory. - MTL::Buffer *argumentBuffer = fDevice->newBuffer(sizeof(plMetalFragmentShaderArgumentBuffer), MTL::ResourceStorageModeManaged); - - plMetalFragmentShaderArgumentBuffer *layerBuffer = (plMetalFragmentShaderArgumentBuffer *)argumentBuffer->contents(); - + + // Create "fast encode" buffers + // Fast encode can be used when there are no piggybacks or pushover layers. We'll load as much of the + // base state of this layer as we can onto the GPU. Using fast encode, the renderer can avoid encoding + // a lot of the render state, it will be on the GPU already. + // I'd like to encode more data here, and use a heap. The heap hasn't happened yet because heaps are + // private memory, and we don't have a window yet for a blit phase into private memory. + MTL::Buffer* argumentBuffer = fDevice->newBuffer(sizeof(plMetalFragmentShaderArgumentBuffer), MTL::ResourceStorageModeManaged); + + plMetalFragmentShaderArgumentBuffer* layerBuffer = (plMetalFragmentShaderArgumentBuffer*)argumentBuffer->contents(); + plMetalFragmentShaderDescription passDescription; - - j = IHandleMaterial(currLayer, &passDescription, layerBuffer, nullptr, - [](plLayerInterface* layer, uint32_t index) { - return layer; - }, - [](plLayerInterface* layer, uint32_t index) { - return layer; - }); + + j = IHandleMaterial( + currLayer, &passDescription, layerBuffer, nullptr, + [](plLayerInterface* layer, uint32_t index) { + return layer; + }, + [](plLayerInterface* layer, uint32_t index) { + return layer; + }); if (j == -1) break; - + passDescription.CacheHash(); fFragmentShaderDescriptions.push_back(passDescription); - + std::vector layers(j); - + pass++; - - //encode the colors for this pass into our buffer for fast rendering - for(int layerOffset = 0; layerOffset < j - currLayer; layerOffset ++) { + + // encode the colors for this pass into our buffer for fast rendering + for (int layerOffset = 0; layerOffset < j - currLayer; layerOffset++) { plLayerInterface* layer = fMaterial->GetLayer(currLayer + layerOffset); layers[layerOffset] = layer; IBuildLayerTexture(NULL, layerOffset, layer); } - + fPasses.push_back(layers); - + argumentBuffer->didModifyRange(NS::Range(0, argumentBuffer->length())); - + fPassArgumentBuffers.push_back(argumentBuffer); - + fPassIndices.push_back(currLayer); fPassLengths.push_back(j - currLayer); fNumPasses++; @@ -287,19 +285,19 @@ const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerIn return state; } -void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer) +void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer) { // Reminder: Encoder is allowed to be null when Plasma is precompiling pipeline states // Metal needs to know if a shader is 2D or Cubic to compile shaders // A null encoder signifies we should build the texture but not bind state - + fPipeline->CheckTextureRef(layer); plBitmap* texture = layer->GetTexture(); - + if (texture != nullptr && encoder) { - plMetalTextureRef *deviceTexture = (plMetalTextureRef *)texture->GetDeviceRef(); - if(!deviceTexture) { - //FIXME: Better way to address missing textures than null pointers + plMetalTextureRef* deviceTexture = (plMetalTextureRef*)texture->GetDeviceRef(); + if (!deviceTexture) { + // FIXME: Better way to address missing textures than null pointers encoder->setFragmentTexture(nullptr, FragmentShaderArgumentAttributeCubicTextures + offsetFromRootLayer); encoder->setFragmentTexture(nullptr, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); return; @@ -310,12 +308,11 @@ void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder *enc } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { encoder->setFragmentTexture(deviceTexture->fTexture, FragmentShaderArgumentAttributeTextures + offsetFromRootLayer); } - - if (fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag != layer->GetClampFlags()) - { + + if (fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag != layer->GetClampFlags()) { MTL::SamplerState* samplerState = fPipeline->fDevice.SampleStateForClampFlags(hsGMatState::hsGMatClampFlags(layer->GetClampFlags())); encoder->setFragmentSamplerState(samplerState, offsetFromRootLayer); - + fPipeline->fState.layerStates[offsetFromRootLayer].clampFlag = hsGMatState::hsGMatClampFlags(layer->GetClampFlags()); } } @@ -351,7 +348,7 @@ uint32_t plMetalMaterialShaderRef::ILayersAtOnce(uint32_t which) // Ignoring max UVW limit - if ((lay->GetMiscFlags() & hsGMatState::kMiscBindNext) && (i+1 >= maxLayers)) { + if ((lay->GetMiscFlags() & hsGMatState::kMiscBindNext) && (i + 1 >= maxLayers)) { break; } @@ -392,7 +389,7 @@ bool plMetalMaterialShaderRef::ICanEatLayer(plLayerInterface* lay) return true; } -uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform) +uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription* passDescription, plMetalFragmentShaderArgumentBuffer* uniforms, std::vector* piggybacks, std::function preEncodeTransform, std::function postEncodeTransform) { if (!fMaterial || layer >= fMaterial->GetNumLayers() || !fMaterial->GetLayer(layer)) { return -1; @@ -401,7 +398,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme if (false /*ISkipBumpMap(fMaterial, layer)*/) { return -1; } - + memset(passDescription, 0, sizeof(plMetalFragmentShaderDescription)); // Ignoring the bit about ATI Radeon and UVW limits @@ -419,7 +416,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme currLay = fMaterial->GetLayer(++layer); } - //currLay = IPushOverAllLayer(currLay); + // currLay = IPushOverAllLayer(currLay); hsGMatState state = ICompositeLayerState(currLay); @@ -433,21 +430,18 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme state.fBlendFlags &= ~hsGMatState::kBlendMask; } - if ((fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (state.fMiscFlags & hsGMatState::kMiscBumpChans) ) { - switch (state.fMiscFlags & hsGMatState::kMiscBumpChans) - { + if ((fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpUV) || fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW)) && (state.fMiscFlags & hsGMatState::kMiscBumpChans)) { + switch (state.fMiscFlags & hsGMatState::kMiscBumpChans) { case hsGMatState::kMiscBumpDu: break; case hsGMatState::kMiscBumpDv: - if (!(fMaterial->GetLayer(layer-2)->GetBlendFlags() & hsGMatState::kBlendAdd)) - { + if (!(fMaterial->GetLayer(layer - 2)->GetBlendFlags() & hsGMatState::kBlendAdd)) { state.fBlendFlags &= ~hsGMatState::kBlendMask; state.fBlendFlags |= hsGMatState::kBlendMADD; } break; case hsGMatState::kMiscBumpDw: - if (!(fMaterial->GetLayer(layer-1)->GetBlendFlags() & hsGMatState::kBlendAdd)) - { + if (!(fMaterial->GetLayer(layer - 1)->GetBlendFlags() & hsGMatState::kBlendAdd)) { state.fBlendFlags &= ~hsGMatState::kBlendMask; state.fBlendFlags |= hsGMatState::kBlendMADD; } @@ -460,61 +454,57 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme uint32_t currNumLayers = ILayersAtOnce(layer); if (state.fMiscFlags & (hsGMatState::kMiscBumpDu | hsGMatState::kMiscBumpDw)) { - //ISetBumpMatrices(currLay); + // ISetBumpMatrices(currLay); } - + passDescription->Populate(currLay, 0); - + postEncodeTransform(currLay, 0); - + int32_t i = 1; - for (i = 1; i < currNumLayers; i++) - { + for (i = 1; i < currNumLayers; i++) { plLayerInterface* layPtr = fMaterial->GetLayer(layer + i); if (!layPtr) { return -1; } layPtr = preEncodeTransform(layPtr, i); - - passDescription->Populate(layPtr, i); - + + passDescription->Populate(layPtr, i); + layPtr = postEncodeTransform(layPtr, i); } - - if(piggybacks) { - for (int32_t currPiggyback = 0; currPiggyback < piggybacks->size(); currPiggyback++) - { + if (piggybacks) { + for (int32_t currPiggyback = 0; currPiggyback < piggybacks->size(); currPiggyback++) { plLayerInterface* layPtr = piggybacks->at(currPiggyback); if (!layPtr) { return -1; } layPtr = preEncodeTransform(layPtr, i + currPiggyback); - + passDescription->Populate(layPtr, i + currPiggyback); - + layPtr = postEncodeTransform(layPtr, i + currPiggyback); } } - - passDescription->numLayers = ( piggybacks ? piggybacks->size() : 0 ) + currNumLayers; - + + passDescription->numLayers = (piggybacks ? piggybacks->size() : 0) + currNumLayers; + if (state.fBlendFlags & (hsGMatState::kBlendTest | hsGMatState::kBlendAlpha | hsGMatState::kBlendAddColorTimesAlpha) && - !(state.fBlendFlags & hsGMatState::kBlendAlphaAlways)) - { + !(state.fBlendFlags & hsGMatState::kBlendAlphaAlways)) { // AlphaTestHigh is used for reducing sort artifacts on textures that // are mostly opaque or transparent, but have regions of translucency // in transition. Like a texture for a bush billboard. It lets there be // some transparency falloff, but quit drawing before it gets so // transparent that draw order problems (halos) become apparent. if (state.fBlendFlags & hsGMatState::kBlendAlphaTestHigh) { - uniforms->alphaThreshold = 64.f/255.f; + uniforms->alphaThreshold = 64.f / 255.f; } else { - uniforms->alphaThreshold = 1.f/255.f; + uniforms->alphaThreshold = 1.f / 255.f; } } else { uniforms->alphaThreshold = 0.f; } - + return layer + currNumLayers; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 97d69b2585..d2e3a8fe88 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -42,14 +42,14 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef _plMetalMaterialShaderRef_inc_ #define _plMetalMaterialShaderRef_inc_ +#include +#include + +#include "ShaderTypes.h" #include "hsGMatState.h" #include "plMetalDeviceRef.h" -#include "ShaderTypes.h" #include "plMetalPipelineState.h" -#include -#include - class hsGMaterial; class plMetalPipeline; class plLayerInterface; @@ -57,57 +57,60 @@ class plLayerInterface; class plMetalMaterialShaderRef : public plMetalDeviceRef { protected: - plMetalPipeline* fPipeline; - hsGMaterial* fMaterial; - //temporary holder for the fragment shader to use, we don't own this reference - MTL::Function* fFragFunction; + plMetalPipeline *fPipeline; + hsGMaterial *fMaterial; + // temporary holder for the fragment shader to use, we don't own this reference + MTL::Function *fFragFunction; + private: - std::vector fPassIndices; - //FIXME: This should be retained/released - MTL::Device* fDevice; - std::vector fPassArgumentBuffers; - + std::vector fPassIndices; + // FIXME: This should be retained/released + MTL::Device *fDevice; + std::vector fPassArgumentBuffers; + public: - void Link(plMetalMaterialShaderRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalMaterialShaderRef* GetNext() { return (plMetalMaterialShaderRef*)fNext; } - - plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline *pipe); + void Link(plMetalMaterialShaderRef **back) { plMetalDeviceRef::Link((plMetalDeviceRef **)back); } + plMetalMaterialShaderRef *GetNext() { return (plMetalMaterialShaderRef *)fNext; } + + plMetalMaterialShaderRef(hsGMaterial *mat, plMetalPipeline *pipe); ~plMetalMaterialShaderRef(); - + void Release(); void CheckMateralRef(); - + uint32_t GetNumPasses() const { return fNumPasses; } - - uint32_t GetPassIndex(size_t which) const { return fPassIndices[which]; } + + uint32_t GetPassIndex(size_t which) const { return fPassIndices[which]; } const std::vector GetLayersForPass(size_t pass) { return fPasses[pass]; } - - void EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription *passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform); - void FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass); - //probably not a good idea to call prepareTextures directly - //mostly just a hack to keep plates working for now - void prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass); - std::vector fPassLengths; - + + void EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription *passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform); + void FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass); + // probably not a good idea to call prepareTextures directly + // mostly just a hack to keep plates working for now + void prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass); + std::vector fPassLengths; + // Set the current Plasma state based on the input layer state and the material overrides. // fMatOverOn overrides to set a state bit whether it is set in the layer or not. // fMatOverOff overrides to clear a state bit whether it is set in the layer or not.s - const hsGMatState ICompositeLayerState(const plLayerInterface* layer); - - const struct plMetalFragmentShaderDescription GetFragmentShaderDescription(size_t which) { + const hsGMatState ICompositeLayerState(const plLayerInterface *layer); + + const struct plMetalFragmentShaderDescription GetFragmentShaderDescription(size_t which) + { return fFragmentShaderDescriptions[which]; } + private: void ILoopOverLayers(); - + uint32_t fNumPasses; - uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform); - bool ICanEatLayer(plLayerInterface* lay); + uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform); + bool ICanEatLayer(plLayerInterface *lay); uint32_t ILayersAtOnce(uint32_t which); - - void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer); - void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); - std::vector> fPasses; + + void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface *layer); + void EncodeTransform(plLayerInterface *layer, UVOutDescriptor *transform); + std::vector> fPasses; std::vector fFragmentShaderDescriptions; }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 53aa2b437e..7afbec5536 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -39,59 +39,50 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com Mead, WA 99021 *==LICENSE==*/ -#include "HeadSpin.h" -#include +#include "plMetalPipeline.h" -#include #import -#include "plQuality.h" - -#include "plMetalPipeline.h" -#include "plMetalMaterialShaderRef.h" -#include "plMetalPlateManager.h" -#include "plMetalPipelineState.h" +#include +#include +#include "HeadSpin.h" +#include "hsGMatState.inl" #include "hsTimer.h" -#include "plPipeDebugFlags.h" -#include "plPipeResReq.h" - -#include "pnNetCommon/plNetApp.h" // for dbg logging -#include "pnMessage/plPipeResMakeMsg.h" +#include "pfCamera/plVirtualCamNeu.h" #include "plAvatar/plAvatarClothing.h" +#include "plDrawable/plAuxSpan.h" #include "plDrawable/plDrawableSpans.h" #include "plDrawable/plGBufferGroup.h" +#include "plGImage/plCubicEnvironmap.h" #include "plGImage/plMipmap.h" #include "plGLight/plLightInfo.h" +#include "plGLight/plShadowCaster.h" +#include "plGLight/plShadowSlave.h" +#include "plMessage/plDeviceRecreateMsg.h" +#include "plMetalFragmentShader.h" +#include "plMetalMaterialShaderRef.h" +#include "plMetalPipelineState.h" +#include "plMetalPlateManager.h" +#include "plMetalTextFont.h" +#include "plMetalVertexShader.h" +#include "plPipeDebugFlags.h" +#include "plPipeResReq.h" #include "plPipeline/plCubicRenderTarget.h" #include "plPipeline/plDebugText.h" #include "plPipeline/plDynamicEnvMap.h" +#include "plProfile.h" +#include "plQuality.h" #include "plScene/plRenderRequest.h" #include "plSurface/hsGMaterial.h" #include "plSurface/plLayer.h" -#include "pfCamera/plVirtualCamNeu.h" -#include "plMessage/plDeviceRecreateMsg.h" -#include "plgDispatch.h" -#include "plDrawable/plAuxSpan.h" #include "plSurface/plLayerShadowBase.h" -#include "plMetalTextFont.h" - -#include "plGImage/plMipmap.h" -#include "plGImage/plCubicEnvironmap.h" - -#include "plGLight/plShadowSlave.h" -#include "plGLight/plShadowCaster.h" - #include "plTweak.h" +#include "plgDispatch.h" +#include "pnMessage/plPipeResMakeMsg.h" +#include "pnNetCommon/plNetApp.h" // for dbg logging -#include "plMetalVertexShader.h" -#include "plMetalFragmentShader.h" - -#include "hsGMatState.inl" - -#include "plProfile.h" - -uint32_t fDbgSetupInitFlags; // HACK temp only +uint32_t fDbgSetupInitFlags; // HACK temp only plProfile_CreateCounter("Feed Triangles", "Draw", DrawFeedTriangles); plProfile_CreateCounter("Draw Prim Static", "Draw", DrawPrimStatic); @@ -130,16 +121,22 @@ plMetalEnumerate plMetalPipeline::enumerator; class plRenderTriListFunc : public plRenderPrimFunc { protected: - plMetalDevice* fDevice; - int fBaseVertexIndex; - int fVStart; - int fVLength; - int fIStart; - int fNumTris; + plMetalDevice* fDevice; + int fBaseVertexIndex; + int fVStart; + int fVLength; + int fIStart; + int fNumTris; + public: plRenderTriListFunc(plMetalDevice* device, int baseVertexIndex, int vStart, int vLength, int iStart, int iNumTris) - : fDevice(device), fBaseVertexIndex(baseVertexIndex), fVStart(vStart), fVLength(vLength), fIStart(iStart), fNumTris(iNumTris) {} + : fDevice(device), + fBaseVertexIndex(baseVertexIndex), + fVStart(vStart), + fVLength(vLength), + fIStart(iStart), + fNumTris(iNumTris) {} bool RenderPrims() const override; }; @@ -149,48 +146,55 @@ bool plRenderTriListFunc::RenderPrims() const plProfile_IncCount(DrawFeedTriangles, fNumTris); plProfile_IncCount(DrawTriangles, fNumTris); plProfile_Inc(DrawPrimStatic); - + size_t uniformsSize = offsetof(VertexUniforms, uvTransforms) + sizeof(UVOutDescriptor) * fDevice->fPipeline->fCurrNumLayers; - fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); - + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(fDevice->fPipeline->fCurrentRenderPassUniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); + plMetalLights* lights = &fDevice->fPipeline->fLights; - size_t lightSize = offsetof(plMetalLights, lampSources) + (sizeof(plMetalShaderLightSource) * lights->count); - + size_t lightSize = offsetof(plMetalLights, lampSources) + (sizeof(plMetalShaderLightSource) * lights->count); + fDevice->CurrentRenderCommandEncoder()->setVertexBytes(lights, sizeof(plMetalLights), VertexShaderArgumentLights); fDevice->CurrentRenderCommandEncoder()->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, fNumTris, MTL::IndexTypeUInt16, fDevice->fCurrentIndexBuffer, (sizeof(uint16_t) * fIStart)); } - - -plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), fCurrentRenderPassUniforms(nullptr), currentDrawableCallback(nullptr), fFragFunction(nullptr), fVShaderRefList(nullptr), fPShaderRefList(nullptr), fULutTextureRef(nullptr), fCurrRenderLayer() +plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord* devMode) : pl3DPipeline(devMode), + fRenderTargetRefList(), + fMatRefList(), + fCurrentRenderPassUniforms(nullptr), + currentDrawableCallback(nullptr), + fFragFunction(nullptr), + fVShaderRefList(nullptr), + fPShaderRefList(nullptr), + fULutTextureRef(nullptr), + fCurrRenderLayer() { fTextureRefList = nullptr; fVtxBuffRefList = nullptr; fIdxBuffRefList = nullptr; fMatRefList = nullptr; fTextFontRefList = nullptr; - + fCurrLayerIdx = 0; fDevice.fPipeline = this; - + fMaxLayersAtOnce = 8; - + // Default our output format to 8 bit BGRA. Client may immediately change this to // the actual framebuffer format. SetFramebufferFormat(MTL::PixelFormatBGRA8Unorm); - + // Alloc half our simultaneous textures to piggybacks. // Won't hurt us unless we try to many things at once. fMaxPiggyBacks = fMaxLayersAtOnce >> 1; - + // Metal is always PS3 capable plQuality::SetCapability(plQuality::kPS_3); - + fDevice.SetMaxAnsiotropy(fInitialPipeParams.AnisotropicLevel); fDevice.SetMSAASampleCount(fInitialPipeParams.AntiAliasingAmount); - - fCurrentRenderPassUniforms = (VertexUniforms *) calloc(sizeof(VertexUniforms), sizeof(char)); - + + fCurrentRenderPassUniforms = (VertexUniforms*)calloc(sizeof(VertexUniforms), sizeof(char)); + // RenderTarget pools are shared for our shadow generation algorithm. // Different sizes for different resolutions. ICreateDeviceObjects(); @@ -200,19 +204,19 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons plMetalPipeline::~plMetalPipeline() { - if (plMetalPlateManager* pm = static_cast(fPlateMgr)) - { + if (plMetalPlateManager* pm = static_cast(fPlateMgr)) { pm->IReleaseGeometry(); } } -void plMetalPipeline::ICreateDeviceObjects() { +void plMetalPipeline::ICreateDeviceObjects() +{ fPlateMgr = new plMetalPlateManager(this); } -bool plMetalPipeline::PreRender(plDrawable *drawable, std::vector &visList, plVisMgr *visMgr) +bool plMetalPipeline::PreRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr) { - plDrawableSpans *ds = plDrawableSpans::ConvertNoRef(drawable); + plDrawableSpans* ds = plDrawableSpans::ConvertNoRef(drawable); if (!ds) { return false; } @@ -226,7 +230,7 @@ bool plMetalPipeline::PreRender(plDrawable *drawable, std::vector &visL return visList.size() > 0; } -bool plMetalPipeline::PrepForRender(plDrawable *drawable, std::vector &visList, plVisMgr *visMgr) +bool plMetalPipeline::PrepForRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr) { plProfile_BeginTiming(PrepDrawable); @@ -248,7 +252,7 @@ bool plMetalPipeline::PrepForRender(plDrawable *drawable, std::vector & // do any last minute updates for its buffers, including // generating particle tri lists. ice->PrepForRender(this); - + // Any skinning necessary if (!ISoftwareVertexBlend(ice, visList)) { plProfile_EndTiming(PrepDrawable); @@ -260,21 +264,23 @@ bool plMetalPipeline::PrepForRender(plDrawable *drawable, std::vector & return true; } -plTextFont *plMetalPipeline::MakeTextFont(ST::string face, uint16_t size) { - plTextFont *font = new plMetalTextFont( this, &fDevice ); - font->Create( face, size ); - font->Link( &fTextFontRefList ); +plTextFont* plMetalPipeline::MakeTextFont(ST::string face, uint16_t size) +{ + plTextFont* font = new plMetalTextFont(this, &fDevice); + font->Create(face, size); + font->Link(&fTextFontRefList); return font; } -bool plMetalPipeline::OpenAccess(plAccessSpan &dst, plDrawableSpans *d, const plVertexSpan *span, bool readOnly) { - //FIXME: Whats this? +bool plMetalPipeline::OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) +{ + // FIXME: Whats this? return false; } -bool plMetalPipeline::CloseAccess(plAccessSpan &acc) { return false; } +bool plMetalPipeline::CloseAccess(plAccessSpan& acc) { return false; } -void plMetalPipeline::PushRenderRequest(plRenderRequest *req) +void plMetalPipeline::PushRenderRequest(plRenderRequest* req) { // Save these, since we want to copy them to our current view hsMatrix44 l2w = fView.GetLocalToWorld(); @@ -315,16 +321,16 @@ void plMetalPipeline::PushRenderRequest(plRenderRequest *req) fState.Reset(); } -void plMetalPipeline::PopRenderRequest(plRenderRequest *req) +void plMetalPipeline::PopRenderRequest(plRenderRequest* req) { if (req->GetOverrideMat()) { PopOverrideMaterial(nil); } - - //new render target means we can't use the previous pipeline state - //it won't be set yet on the new target - //in theory we could have a stack of these so when we unwind we - //could get the state back. + + // new render target means we can't use the previous pipeline state + // it won't be set yet on the new target + // in theory we could have a stack of these so when we unwind we + // could get the state back. fState.Reset(); hsRefCnt_SafeUnRef(fView.fRenderRequest); @@ -335,39 +341,39 @@ void plMetalPipeline::PopRenderRequest(plRenderRequest *req) fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera; } -plRenderTarget* plMetalPipeline::PopRenderTarget() { +plRenderTarget* plMetalPipeline::PopRenderTarget() +{ pl3DPipeline::PopRenderTarget(); fState.Reset(); } -void plMetalPipeline::ClearRenderTarget(plDrawable *d) +void plMetalPipeline::ClearRenderTarget(plDrawable* d) { plDrawableSpans* src = plDrawableSpans::ConvertNoRef(d); - if( !src ) - { + if (!src) { ClearRenderTarget(); return; } - + Draw(d); } -void plMetalPipeline::ClearRenderTarget(const hsColorRGBA *col, const float *depth) +void plMetalPipeline::ClearRenderTarget(const hsColorRGBA* col, const float* depth) { if (fView.fRenderState & (kRenderClearColor | kRenderClearDepth)) { hsColorRGBA clearColor = col ? *col : GetClearColor(); - float clearDepth = depth ? *depth : fView.GetClearDepth(); + float clearDepth = depth ? *depth : fView.GetClearDepth(); fDevice.Clear(fView.fRenderState & kRenderClearColor, {clearColor.r, clearColor.g, clearColor.b, clearColor.a}, fView.fRenderState & kRenderClearDepth, 1.0); fState.Reset(); } } -hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) +hsGDeviceRef* plMetalPipeline::MakeRenderTargetRef(plRenderTarget* owner) { plMetalRenderTargetRef* ref = nullptr; - MTL::Texture *depthBuffer = nullptr; - plCubicRenderTarget *cubicRT; + MTL::Texture* depthBuffer = nullptr; + plCubicRenderTarget* cubicRT; // If we have Shader Model 3 and support non-POT textures, let's make reflections the pipe size if (plDynamicCamMap* camMap = plDynamicCamMap::ConvertNoRef(owner)) { @@ -384,68 +390,62 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) // If we already have a rendertargetref, we just need it filled out with D3D resources. if (owner->GetDeviceRef()) ref = (plMetalRenderTargetRef*)owner->GetDeviceRef(); - + /// Create the render target now // Start with the depth surface. // Note that we only ever give a cubic rendertarget a single shared depth buffer, // since we only render one face at a time. If we were rendering part of face X, then part // of face Y, then more of face X, then they would all need their own depth buffers. if (owner->GetZDepth() && (owner->GetFlags() & (plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen))) { - MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + MTL::TextureDescriptor* depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, owner->GetWidth(), owner->GetHeight(), false); if (fDevice.fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { // on Apple Silicon GPUs - don't allocate memory to back the render target // this assumes the render target only needs to survive this render pass - //FIXME: Do we need to promise the output survives the render pass? + // FIXME: Do we need to promise the output survives the render pass? depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); - } else { + } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); } depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); depthBuffer = fDevice.fMetalDevice->newTexture(depthTextureDescriptor); } - // See if it's a cubic render target. // Primary consumer here is the vertex/pixel shader water. - cubicRT = plCubicRenderTarget::ConvertNoRef( owner ); - if( cubicRT ) - { + cubicRT = plCubicRenderTarget::ConvertNoRef(owner); + if (cubicRT) { if (!ref) ref = new plMetalRenderTargetRef(); - - MTL::TextureDescriptor *textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), false); + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), false); textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsagePixelFormatView); textureDescriptor->setStorageMode(MTL::StorageModePrivate); - - plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); - MTL::Texture * texture = fDevice.fMetalDevice->newTexture(textureDescriptor); - + + plMetalDeviceRef* device = (plMetalDeviceRef*)owner->GetDeviceRef(); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + /// Create a CUBIC texture - for( int i = 0; i < 6; i++ ) - { - plRenderTarget *face = cubicRT->GetFace( i ); - plMetalRenderTargetRef *fRef; - - if( face->GetDeviceRef() != nil ) - { - fRef = (plMetalRenderTargetRef *)face->GetDeviceRef(); - if( !fRef->IsLinked() ) - fRef->Link( &fRenderTargetRefList ); - } - else - { + for (int i = 0; i < 6; i++) { + plRenderTarget* face = cubicRT->GetFace(i); + plMetalRenderTargetRef* fRef; + + if (face->GetDeviceRef() != nil) { + fRef = (plMetalRenderTargetRef*)face->GetDeviceRef(); + if (!fRef->IsLinked()) + fRef->Link(&fRenderTargetRefList); + } else { fRef = new plMetalRenderTargetRef(); - + face->SetDeviceRef(fRef); - ( (plMetalRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList ); + ((plMetalRenderTargetRef*)face->GetDeviceRef())->Link(&fRenderTargetRefList); // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) - hsRefCnt_SafeUnRef( face->GetDeviceRef() ); + hsRefCnt_SafeUnRef(face->GetDeviceRef()); } - - //in since the root texture has changed reload all the face textures + + // in since the root texture has changed reload all the face textures static const uint kFaceMapping[] = { 1, // kLeftFace 0, // kRightFace @@ -454,32 +454,32 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) 2, // kTopFace 3 // kBottomFace }; - - if(fRef->fTexture) { + + if (fRef->fTexture) { fRef->fTexture->release(); fRef->fTexture = nullptr; } - - if(fRef->fDepthBuffer) { + + if (fRef->fDepthBuffer) { fRef->fDepthBuffer->release(); fRef->fDepthBuffer = nullptr; } - + fRef->fTexture = texture->newTextureView(MTL::PixelFormatBGRA8Unorm, MTL::TextureType2D, NS::Range::Make(0, 1), NS::Range::Make(kFaceMapping[i], 1)); - //in since the depth buffer is shared each render target gets their own retain + // in since the depth buffer is shared each render target gets their own retain fRef->fDepthBuffer = depthBuffer->retain(); fRef->SetDirty(false); } - - //if the ref already has an old texture, release it - if(ref->fTexture) + + // if the ref already has an old texture, release it + if (ref->fTexture) ref->fTexture->release(); - if(ref->fDepthBuffer) + if (ref->fDepthBuffer) ref->fDepthBuffer->release(); ref->fTexture = texture; ref->fDepthBuffer = depthBuffer; ref->fOwner = owner; - + // Keep it in a linked list for ready destruction. if (owner->GetDeviceRef() != ref) { owner->SetDeviceRef(ref); @@ -492,33 +492,32 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) ref->Link(&fRenderTargetRefList); } ref->SetDirty(false); - + return ref; - } - else if (owner->GetFlags() & plRenderTarget::kIsTexture) { + } else if (owner->GetFlags() & plRenderTarget::kIsTexture) { if (!ref) ref = new plMetalRenderTargetRef(); - - MTL::TextureDescriptor *textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); textureDescriptor->setWidth(owner->GetWidth()); textureDescriptor->setHeight(owner->GetHeight()); textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); textureDescriptor->setStorageMode(MTL::StorageModePrivate); - - plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); - MTL::Texture * texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + + plMetalDeviceRef* device = (plMetalDeviceRef*)owner->GetDeviceRef(); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); textureDescriptor->release(); - - //if the ref already has an old texture, release it - if(ref->fTexture) + + // if the ref already has an old texture, release it + if (ref->fTexture) ref->fTexture->release(); - if(ref->fDepthBuffer) + if (ref->fDepthBuffer) ref->fDepthBuffer->release(); ref->fTexture = texture; ref->fDepthBuffer = depthBuffer; ref->fOwner = owner; - + // Keep it in a linked list for ready destruction. if (owner->GetDeviceRef() != ref) { owner->SetDeviceRef(ref); @@ -530,53 +529,53 @@ hsGDeviceRef *plMetalPipeline::MakeRenderTargetRef(plRenderTarget *owner) if (ref != nullptr && !ref->IsLinked()) ref->Link(&fRenderTargetRefList); } - + return ref; } - + // Not a texture either, must be a plain offscreen. // Offscreen isn't currently used for anything. else if (owner->GetFlags() & plRenderTarget::kIsOffscreen) { /// Create a blank surface - - if (!ref) - ref = new plMetalRenderTargetRef(); - - MTL::TextureDescriptor *textureDescriptor = MTL::TextureDescriptor::alloc()->init(); - textureDescriptor->setWidth(owner->GetWidth()); - textureDescriptor->setHeight(owner->GetHeight()); - textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); - textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); - textureDescriptor->setStorageMode(MTL::StorageModeManaged); - - plMetalDeviceRef *device = (plMetalDeviceRef *)owner->GetDeviceRef(); - MTL::Texture * texture = fDevice.fMetalDevice->newTexture(textureDescriptor); - textureDescriptor->release(); - - //if the ref already has an old texture, release it - if(ref->fTexture) - ref->fTexture->release(); - if(ref->fDepthBuffer) - ref->fDepthBuffer->release(); - ref->fTexture = texture; - ref->fDepthBuffer = depthBuffer; - ref->fOwner = owner; - - // Keep it in a linked list for ready destruction. - if (owner->GetDeviceRef() != ref) { - owner->SetDeviceRef(ref); - // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) - hsRefCnt_SafeUnRef(ref); - if (ref != nullptr && !ref->IsLinked()) - ref->Link(&fRenderTargetRefList); - } else { - if (ref != nullptr && !ref->IsLinked()) - ref->Link(&fRenderTargetRefList); - } - - return ref; + + if (!ref) + ref = new plMetalRenderTargetRef(); + + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::alloc()->init(); + textureDescriptor->setWidth(owner->GetWidth()); + textureDescriptor->setHeight(owner->GetHeight()); + textureDescriptor->setPixelFormat(MTL::PixelFormatBGRA8Unorm); + textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead); + textureDescriptor->setStorageMode(MTL::StorageModeManaged); + + plMetalDeviceRef* device = (plMetalDeviceRef*)owner->GetDeviceRef(); + MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); + textureDescriptor->release(); + + // if the ref already has an old texture, release it + if (ref->fTexture) + ref->fTexture->release(); + if (ref->fDepthBuffer) + ref->fDepthBuffer->release(); + ref->fTexture = texture; + ref->fDepthBuffer = depthBuffer; + ref->fOwner = owner; + + // Keep it in a linked list for ready destruction. + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); + // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) + hsRefCnt_SafeUnRef(ref); + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nullptr && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } + + return ref; } - + // Keep it in a linked list for ready destruction. if (owner->GetDeviceRef() != ref) { owner->SetDeviceRef(ref); @@ -603,9 +602,9 @@ bool plMetalPipeline::BeginRender() fCurrentPool = NS::AutoreleasePool::alloc()->init(); // offset transform RefreshScreenMatrices(); - + fState.Reset(); - + // offset transform RefreshScreenMatrices(); @@ -619,18 +618,18 @@ bool plMetalPipeline::BeginRender() // Render any shadow maps that have been submitted for this frame. IPreprocessShadows(); IPreprocessAvatarTextures(); - - CA::MetalDrawable *drawable = currentDrawableCallback(fDevice.fMetalDevice); - if(!drawable) { + + CA::MetalDrawable* drawable = currentDrawableCallback(fDevice.fMetalDevice); + if (!drawable) { fCurrentPool->release(); return true; } fDevice.CreateNewCommandBuffer(drawable); drawable->release(); - + /// If we have a renderTarget active, use its viewport - //FIXME: New drawables should inherit existing viewport - //fDevice.SetViewport(); + // FIXME: New drawables should inherit existing viewport + // fDevice.SetViewport(); } fRenderCnt++; @@ -645,13 +644,13 @@ bool plMetalPipeline::EndRender() { bool retVal = false; fState.Reset(); - + if (--fInSceneDepth == 0) { fDevice.SubmitCommandBuffer(); - + IClearShadowSlaves(); } - + // Do this last, after we've drawn everything // Just letting go of things we're done with for the frame. hsRefCnt_SafeUnRef(fCurrMaterial); @@ -668,22 +667,20 @@ bool plMetalPipeline::EndRender() return retVal; } -void plMetalPipeline::RenderScreenElements() { +void plMetalPipeline::RenderScreenElements() +{ bool reset = false; - if (fView.HasCullProxy()) - { + if (fView.HasCullProxy()) { Draw(fView.GetCullProxy()); } - hsGMatState tHack = PushMaterialOverride(hsGMatState::kMisc, hsGMatState::kMiscWireFrame, false); hsGMatState ambHack = PushMaterialOverride(hsGMatState::kShade, hsGMatState::kShadeWhite, true); plProfile_BeginTiming(PlateMgr); // Plates - if (fPlateMgr) - { + if (fPlateMgr) { fPlateMgr->DrawToDevice(this); reset = true; } @@ -694,16 +691,14 @@ void plMetalPipeline::RenderScreenElements() { plProfile_BeginTiming(DebugText); /// Debug text - if (fDebugTextMgr && plDebugText::Instance().IsEnabled()) - { + if (fDebugTextMgr && plDebugText::Instance().IsEnabled()) { fDebugTextMgr->DrawToDevice(this); reset = true; } plProfile_EndTiming(DebugText); plProfile_BeginTiming(Reset); - if (reset) - { + if (reset) { fView.fXformResetFlags = fView.kResetAll; // Text destroys view transforms } plProfile_EndTiming(Reset); @@ -715,10 +710,10 @@ void plMetalPipeline::Resize(uint32_t width, uint32_t height) { /* Resize had a bunch of notes on the DX version about how it was an old function, replaced by ResetDisplayDevice. I'll implement it for now, but consider moving over to ResetDisplayDevice. - + This function is cheaper than resetting the entire display device though. */ - hsMatrix44 w2c, c2w, proj; + hsMatrix44 w2c, c2w, proj; // Store some states that we *want* to restore back... plViewTransform resetTransform = GetViewTransform(); @@ -726,30 +721,27 @@ void plMetalPipeline::Resize(uint32_t width, uint32_t height) // Destroy old IReleaseDeviceObjects(); IReleaseDynDeviceObjects(); - + // Reset width and height - if( width != 0 && height != 0 ) - { + if (width != 0 && height != 0) { // Width and height of zero mean just recreate fOrigWidth = width; fOrigHeight = height; IGetViewTransform().SetScreenSize((uint16_t)(fOrigWidth), (uint16_t)(fOrigHeight)); resetTransform.SetScreenSize((uint16_t)(fOrigWidth), (uint16_t)(fOrigHeight)); - } - else - { + } else { // Just for debug - hsStatusMessage( "Recreating the pipeline...\n" ); + hsStatusMessage("Recreating the pipeline...\n"); } - + ICreateDeviceObjects(); // Restore states SetViewTransform(resetTransform); IProjectionMatrixToDevice(); - + plVirtualCam1::Refresh(); - + ICreateDynDeviceObjects(); /// Broadcast a message letting everyone know that we were recreated and that @@ -758,27 +750,26 @@ void plMetalPipeline::Resize(uint32_t width, uint32_t height) plgDispatch::MsgSend(clean); } - void plMetalPipeline::IReleaseDeviceObjects() { IReleaseDynDeviceObjects(); - + delete fPlateMgr; fPlateMgr = nullptr; } void plMetalPipeline::LoadResources() { - hsStatusMessageF("Begin Device Reload t=%f",hsTimer::GetSeconds()); + hsStatusMessageF("Begin Device Reload t=%f", hsTimer::GetSeconds()); plNetClientApp::StaticDebugMsg("Begin Device Reload"); - - if(fFragFunction == nil) { + + if (fFragFunction == nil) { FindFragFunction(); } if (plMetalPlateManager* pm = static_cast(fPlateMgr)) pm->IReleaseGeometry(); - + IReleaseDynamicBuffers(); IReleaseAvRTPool(); @@ -806,7 +797,7 @@ void plMetalPipeline::LoadResources() plProfile_IncCount(PipeReload, 1); - hsStatusMessageF("End Device Reload t=%f",hsTimer::GetSeconds()); + hsStatusMessageF("End Device Reload t=%f", hsTimer::GetSeconds()); plNetClientApp::StaticDebugMsg("End Device Reload"); } @@ -819,22 +810,21 @@ bool plMetalPipeline::SetGamma(float eR, float eG, float eB) tabR[0] = tabG[0] = tabB[0] = 0L; plConst(float) kMinE(0.1f); - if( eR > kMinE ) + if (eR > kMinE) eR = 1.f / eR; else eR = 1.f / kMinE; - if( eG > kMinE ) + if (eG > kMinE) eG = 1.f / eG; else eG = 1.f / kMinE; - if( eB > kMinE ) + if (eB > kMinE) eB = 1.f / eB; else eB = 1.f / kMinE; int i; - for( i = 1; i < 256; i++ ) - { + for (i = 1; i < 256; i++) { float orig = float(i) / 255.f; float gamm; @@ -856,14 +846,14 @@ bool plMetalPipeline::SetGamma(float eR, float eG, float eB) return true; } -bool plMetalPipeline::SetGamma(const uint16_t *const tabR, const uint16_t *const tabG, const uint16_t *const tabB) +bool plMetalPipeline::SetGamma(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) { - //allocate a new buffer every time so we don't cause problems with a running render pass - if(fDevice.fGammaLUTTexture) { + // allocate a new buffer every time so we don't cause problems with a running render pass + if (fDevice.fGammaLUTTexture) { fDevice.fGammaLUTTexture->release(); fDevice.fGammaLUTTexture = nullptr; } - + /* Plasma has multiple types of gamma corrections it can do - and the engine reserves the right to create any color correct LUT. Ugh. Load the LUT into a texture as 8 bit @@ -875,129 +865,126 @@ bool plMetalPipeline::SetGamma(const uint16_t *const tabR, const uint16_t *const texDescriptor->setWidth(256); texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); texDescriptor->setArrayLength(3); - + fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); - + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 0, tabR, 256 * sizeof(uint16_t), 0); fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 1, tabG, 256 * sizeof(uint16_t), 0); fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 2, tabB, 256 * sizeof(uint16_t), 0); - + return true; } -bool plMetalPipeline::SetGamma10(const uint16_t *const tabR, const uint16_t *const tabG, const uint16_t *const tabB) +bool plMetalPipeline::SetGamma10(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) { - //allocate a new buffer every time so we don't cause problems with a running render pass - if(fDevice.fGammaLUTTexture) { + // allocate a new buffer every time so we don't cause problems with a running render pass + if (fDevice.fGammaLUTTexture) { fDevice.fGammaLUTTexture->release(); fDevice.fGammaLUTTexture = nullptr; } - + /* Loads in a real 10 bit color LUT for fancy displays. This LUT contains way more data - but the shader doesn't care. The shader does an x lookup by normalized co-ordinate - not value. So the width of the texture can vary. */ - + MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); texDescriptor->setTextureType(MTL::TextureType1DArray); texDescriptor->setWidth(1024); texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); texDescriptor->setArrayLength(3); - + fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); - + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 0, tabR, 1024 * sizeof(uint16_t), 0); fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 1, tabG, 1024 * sizeof(uint16_t), 0); fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 2, tabB, 1024 * sizeof(uint16_t), 0); - + return true; } -bool plMetalPipeline::CaptureScreen(plMipmap *dest, bool flipVertical, uint16_t desiredWidth, uint16_t desiredHeight) +bool plMetalPipeline::CaptureScreen(plMipmap* dest, bool flipVertical, uint16_t desiredWidth, uint16_t desiredHeight) { - //FIXME: Screen capture - //FIXME: Double fix me - wasn't this working? + // FIXME: Screen capture + // FIXME: Double fix me - wasn't this working? return false; } -plMipmap *plMetalPipeline::ExtractMipMap(plRenderTarget *targ) +plMipmap* plMetalPipeline::ExtractMipMap(plRenderTarget* targ) { - if( plCubicRenderTarget::ConvertNoRef(targ) ) + if (plCubicRenderTarget::ConvertNoRef(targ)) return nullptr; - if( targ->GetPixelSize() != 32 ) - { + if (targ->GetPixelSize() != 32) { hsAssert(false, "Only RGBA8888 currently implemented"); return nullptr; } - + plMetalRenderTargetRef* ref = (plMetalRenderTargetRef*)targ->GetDeviceRef(); - if( !ref ) + if (!ref) return nullptr; - + const int width = targ->GetWidth(); const int height = targ->GetHeight(); plMipmap* mipMap = new plMipmap(width, height, plMipmap::kARGB32Config, 1); - uint8_t* ptr = (uint8_t*)(ref->fTexture->buffer()->contents()); + uint8_t* ptr = (uint8_t*)(ref->fTexture->buffer()->contents()); const NS::UInteger pitch = ref->fTexture->width() * 4; - + ref->fTexture->getBytes(mipMap->GetAddr32(0, 0), pitch, MTL::Region(0, 0, width, height), 0); const uint32_t blackOpaque = 0xff000000; - int y; - for( y = 0; y < height; y++ ) - { + int y; + for (y = 0; y < height; y++) { uint32_t* destPtr = mipMap->GetAddr32(0, y); uint32_t* srcPtr = (uint32_t*)destPtr; - int x; - for( x = 0; x < width; x++ ) - { + int x; + for (x = 0; x < width; x++) { destPtr[x] = srcPtr[x] | blackOpaque; } ptr += pitch; } - + return mipMap; } -void plMetalPipeline::GetSupportedDisplayModes(std::vector *res, int ColorDepth) +void plMetalPipeline::GetSupportedDisplayModes(std::vector* res, int ColorDepth) { /* There are decisions to make here. - + Modern macOS does not support "display modes." You panel runs at native resolution at all times, and you can over-render or under-render. But you never set the display mode of the panel, or get the display mode of the panel. Most games have a "scale slider." - + Note: There are legacy APIs for display modes for compatibility with older software. In since we're here writing a new renderer, lets do things the right way. The display mode APIs also have trouble with density. I.E. a 4k display might be reported as a 2k display if the window manager is running in a higher DPI mode. - + The basic approach should be to render at whatever the resolution of our output surface is. We're mostly doing that now (aspect ratio doesn't adjust.) - + Ideally we should support some sort of scaling/semi dynamic renderbuffer resolution thing. But don't mess with the window servers framebuffer size. macOS has accelerated resolution scaling like consoles do. Use that. */ - + std::vector supported; - plDisplayMode mode; + plDisplayMode mode; mode.Width = 800; mode.Height = 600; mode.ColorDepth = 32; supported.push_back(mode); - + *res = supported; } int plMetalPipeline::GetMaxAnisotropicSamples() { - //Metal always supports 16. There is no device check (as far as I know.) + // Metal always supports 16. There is no device check (as far as I know.) return 16; } int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) { - //Metal devices may not support the full antialias range - //return the max and we'll work it out later + // Metal devices may not support the full antialias range + // return the max and we'll work it out later if (fDevice.fMetalDevice->supportsTextureSampleCount(8)) { return 8; } @@ -1012,33 +999,32 @@ int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) void plMetalPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync) { - //FIXME: Whats this? - //Seems like an entry point for passing in display settings. - + // FIXME: Whats this? + // Seems like an entry point for passing in display settings. + fDevice.SetMaxAnsiotropy(MaxAnisotropicSamples); } -void plMetalPipeline::RenderSpans(plDrawableSpans *ice, const std::vector &visList) +void plMetalPipeline::RenderSpans(plDrawableSpans* ice, const std::vector& visList) { plProfile_BeginTiming(RenderSpan); - hsMatrix44 lastL2W; - size_t i, j; - hsGMaterial* material; + hsMatrix44 lastL2W; + size_t i, j; + hsGMaterial* material; const std::vector& spans = ice->GetSpanArray(); - //plProfile_IncCount(EmptyList, !visList.GetCount()); + // plProfile_IncCount(EmptyList, !visList.GetCount()); /// Set this (*before* we do our TestVisibleWorld stuff...) lastL2W.Reset(); - ISetLocalToWorld(lastL2W, lastL2W); // This is necessary; otherwise, we have to test for - // the first transform set, since this'll be identity - // but the actual device transform won't be (unless - // we do this) - + ISetLocalToWorld(lastL2W, lastL2W); // This is necessary; otherwise, we have to test for + // the first transform set, since this'll be identity + // but the actual device transform won't be (unless + // we do this) /// Loop through our spans, combining them when possible - for (i = 0; i < visList.size(); ) { + for (i = 0; i < visList.size();) { if (GetOverrideMaterial() != nullptr) { material = GetOverrideMaterial(); } else { @@ -1060,7 +1046,7 @@ void plMetalPipeline::RenderSpans(plDrawableSpans *ice, const std::vectorMergeInto(&tempIce); @@ -1079,8 +1065,8 @@ void plMetalPipeline::RenderSpans(plDrawableSpans *ice, const std::vectorIsLinked()) { mRef->Link(&fMatRefList); } - - hsGDeviceRef* vb = ice->GetVertexRef( tempIce.fGroupIdx, tempIce.fVBufferIdx ); + + hsGDeviceRef* vb = ice->GetVertexRef(tempIce.fGroupIdx, tempIce.fVBufferIdx); plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; // What do we change? @@ -1101,12 +1087,12 @@ void plMetalPipeline::RenderSpans(plDrawableSpans *ice, const std::vectorGetIndexRef( tempIce.fGroupIdx, tempIce.fIBufferIdx ), - material, - tempIce.fVStartIdx, tempIce.fVLength, // These are used as our accumulated range - tempIce.fIPackedIdx, tempIce.fILength ); + IRenderBufferSpan(tempIce, + vb, + ice->GetIndexRef(tempIce.fGroupIdx, tempIce.fIBufferIdx), + material, + tempIce.fVStartIdx, tempIce.fVLength, // These are used as our accumulated range + tempIce.fIPackedIdx, tempIce.fILength); } // Restart our search... @@ -1121,24 +1107,23 @@ void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& { if (span.fNumMatrices) { if (span.fNumMatrices <= 2) { - ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal ); + ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal); lastL2W = span.fLocalToWorld; } else { lastL2W.Reset(); - ISetLocalToWorld( lastL2W, lastL2W ); + ISetLocalToWorld(lastL2W, lastL2W); fView.fLocalToWorldLeftHanded = span.fLocalToWorld.GetParity(); } } else if (lastL2W != span.fLocalToWorld) { - ISetLocalToWorld( span.fLocalToWorld, span.fWorldToLocal ); + ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal); lastL2W = span.fLocalToWorld; } else { fView.fLocalToWorldLeftHanded = lastL2W.GetParity(); } - if( span.fNumMatrices == 2 ) - { + if (span.fNumMatrices == 2) { matrix_float4x4 mat; - hsMatrix2SIMD(drawable->GetPaletteMatrix(span.fBaseMatrix+1), &mat); + hsMatrix2SIMD(drawable->GetPaletteMatrix(span.fBaseMatrix + 1), &mat); fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), VertexShaderArgumentBlendMatrix1); } @@ -1149,25 +1134,25 @@ void plMetalPipeline::ISetupTransforms(plDrawableSpans* drawable, const plSpan& } void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, - hsGDeviceRef* ib, hsGMaterial* material, - uint32_t vStart, uint32_t vLength, - uint32_t iStart, uint32_t iLength) + hsGDeviceRef* ib, hsGMaterial* material, + uint32_t vStart, uint32_t vLength, + uint32_t iStart, uint32_t iLength) { - if(iLength == 0) { + if (iLength == 0) { return; } - + plProfile_BeginTiming(RenderBuff); - plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; - plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)ib; + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)ib; plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); mRef->CheckMateralRef(); if (!vRef || !vRef->GetBuffer() || !iRef->GetBuffer()) { plProfile_EndTiming(RenderBuff); - hsAssert( false, ST::format("Trying to render a nil buffer pair! (Mat: {})", material->GetKeyName()).c_str() ); + hsAssert(false, ST::format("Trying to render a nil buffer pair! (Mat: {})", material->GetKeyName()).c_str()); return; } @@ -1186,16 +1171,16 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, } } #endif - + // Turn on this spans lights and turn off the rest. ISelectLights(&span, mRef); - + #ifdef HS_DEBUGGING fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string(material->GetKeyName().c_str(), NS::UTF8StringEncoding)); #endif - + /* Vertex Buffer stuff */ - if(!vRef->GetBuffer()) { + if (!vRef->GetBuffer()) { return; } if (fState.fCurrentVertexBuffer != vRef->GetBuffer()) { @@ -1203,27 +1188,25 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, fState.fCurrentVertexBuffer = vRef->GetBuffer(); } fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); - + IPushPiggyBacks(material); hsRefCnt_SafeAssign(fCurrMaterial, material); uint32_t pass; for (pass = 0; pass < mRef->GetNumPasses(); pass++) { - - if ( IHandleMaterialPass(material, pass, &span, vRef) ) { + if (IHandleMaterialPass(material, pass, &span, vRef)) { render.RenderPrims(); } - - //Projection wants to do it's own lighting, push the current lighting state - //so we can keep the same light calculations on the next pass + + // Projection wants to do it's own lighting, push the current lighting state + // so we can keep the same light calculations on the next pass PushCurrentLightSources(); - + plProfile_BeginTiming(SelectProj); - ISelectLights( &span, mRef, true ); + ISelectLights(&span, mRef, true); plProfile_EndTiming(SelectProj); - + // Take care of projections that get applied to each pass. - if( fProjEach.size() && !(fView.fRenderState & kRenderNoProjection) ) - { + if (fProjEach.size() && !(fView.fRenderState & kRenderNoProjection)) { #ifdef HS_DEBUGGING fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::String::string("Render projections", NS::UTF8StringEncoding)); #endif @@ -1232,51 +1215,48 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); #endif } - //Revert the light state back to what we had before projections + // Revert the light state back to what we had before projections PopCurrentLightSources(); - + if (IsDebugFlagSet(plPipeDbg::kFlagNoUpperLayers)) pass = mRef->GetNumPasses(); } - + IPopPiggyBacks(); - + // Render any aux spans associated. - if( span.GetNumAuxSpans() ) { + if (span.GetNumAuxSpans()) { IRenderAuxSpans(span); - - //aux spans will change the current vertex buffer, put ours back + + // aux spans will change the current vertex buffer, put ours back fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); fState.fCurrentVertexBuffer = vRef->GetBuffer(); } - - // Only render projections and shadows if we successfully rendered the span. // j == -1 means we aborted render. - if( pass >= 0 ) - { - //if we had to render aux spans, we probably changed the vertex and index buffer - //reset those + if (pass >= 0) { + // if we had to render aux spans, we probably changed the vertex and index buffer + // reset those fState.fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); - + // Projections that get applied to the frame buffer (after all passes). - if( fProjAll.size() && !(fView.fRenderState & kRenderNoProjection) ) { + if (fProjAll.size() && !(fView.fRenderState & kRenderNoProjection)) { fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::MakeConstantString("Render All Projections")); IRenderProjections(render, vRef); fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); } // Handle render of shadows onto geometry. - if( fShadows.size() ) { + if (fShadows.size()) { IRenderShadowsOntoSpan(render, &span, material, vRef); } } - - if ( span.GetNumAuxSpans() || (pass >= 0 && fShadows.size()) ) { + + if (span.GetNumAuxSpans() || (pass >= 0 && fShadows.size())) { } - + #ifdef HS_DEBUGGING fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); #endif @@ -1289,8 +1269,7 @@ void plMetalPipeline::IRenderProjections(const plRenderPrimFunc& render, const p { PushCurrentLightSources(); IDisableLightsForShadow(); - for (plLightInfo* li : fProjAll) - { + for (plLightInfo* li : fProjAll) { IRenderProjection(render, li, vRef); } PopCurrentLightSources(); @@ -1307,9 +1286,9 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI plLayerInterface* proj = li->GetProjection(); CheckTextureRef(proj); plMetalTextureRef* tex = (plMetalTextureRef*)proj->GetTexture()->GetDeviceRef(); - + IScaleLight(0, true); - + fCurrentRenderPassUniforms->ambientSrc = 1.0; fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->emissiveSrc = 1.0; @@ -1320,31 +1299,30 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; fCurrentRenderPassUniforms->fogColor = {0.0, 0.0, 0.0}; fCurrentRenderPassUniforms->diffuseCol = {1.0, 1.0, 1.0, 1.0}; - - + matrix_float4x4 tXfm; hsMatrix2SIMD(proj->GetTransform(), &tXfm); fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = proj->GetUVWSrc(); - + fCurrNumLayers = 1; // We should have put ZNoZWrite on during export, but we didn't. IHandleZMode(hsGMatState::kZNoZWrite); - - //This is a bit weird - in since this isn't a material we need to build a query for the right Metal program ourselves + + // This is a bit weird - in since this isn't a material we need to build a query for the right Metal program ourselves plMetalFragmentShaderDescription description; memset(&description, 0, sizeof(description)); description.numLayers = fCurrNumLayers = 1; - + description.Populate(proj, 0); - //DX sets the color invert when the final color should be inverted. Not sure why! - if( proj->GetBlendFlags() & hsGMatState::kBlendInvertFinalColor ) { + // DX sets the color invert when the final color should be inverted. Not sure why! + if (proj->GetBlendFlags() & hsGMatState::kBlendInvertFinalColor) { description.blendModes[0] |= hsGMatState::kBlendInvertColor; } - - plMetalMaterialPassPipelineState materialShaderState(&fDevice, vRef, description); + + plMetalMaterialPassPipelineState materialShaderState(&fDevice, vRef, description); plMetalDevice::plMetalLinkedPipeline* linkedPipeline = materialShaderState.GetRenderPipelineState(); - + fState.fCurrentPipelineState = linkedPipeline->pipelineState; fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(tex->fTexture, 0); @@ -1362,7 +1340,7 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef) { // If this is a bump map pass, forget it, we've already "done" per-pixel lighting. - //if( fLayerState[iPass].fMiscFlags & (hsGMatState::kMiscBumpLayer | hsGMatState::kMiscBumpChans) ) + // if( fLayerState[iPass].fMiscFlags & (hsGMatState::kMiscBumpLayer | hsGMatState::kMiscBumpChans) ) // return; // Push the LayerShadowBase override. This sets the blend @@ -1371,11 +1349,10 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM // For each projector: int k; - for( k = 0; k < fProjEach.size(); k++ ) - { + for (k = 0; k < fProjEach.size(); k++) { // Push it's projected texture as a piggyback. - plLightInfo* li = fProjEach[k]; - plMetalMaterialShaderRef *mRef = (plMetalMaterialShaderRef *)material->GetDeviceRef(); + plLightInfo* li = fProjEach[k]; + plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); plLayerInterface* proj = li->GetProjection(); hsAssert(proj, "A projector with no texture to project?"); @@ -1387,8 +1364,8 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM AppendLayerInterface(&layLightBase, false); - IHandleMaterialPass( material, iPass, &span, vRef, false ); - + IHandleMaterialPass(material, iPass, &span, vRef, false); + IScaleLight(0, true); // Do the render with projection. @@ -1398,12 +1375,9 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM // Pop it's projected texture off piggyback IPopProjPiggyBacks(); - } - } - // ICheckAuxBuffers /////////////////////////////////////////////////////////////////////// // The AuxBuffers are associated with drawables for things to be drawn right after that // drawable's contents. In particular, see the plDynaDecal, which includes things like @@ -1415,17 +1389,16 @@ bool plMetalPipeline::ICheckAuxBuffers(const plAuxSpan* span) plGBufferGroup* group = span->fGroup; plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)group->GetVertexBufferRef(span->fVBufferIdx); - if( !vRef ) + if (!vRef) return true; plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)group->GetIndexBufferRef(span->fIBufferIdx); - if( !iRef ) + if (!iRef) return true; // If our vertex buffer ref is volatile and the timestamp is off // then it needs to be refilled - if( vRef->Expired(fVtxRefTime) ) - { + if (vRef->Expired(fVtxRefTime)) { IRefreshDynVertices(group, vRef); } @@ -1443,11 +1416,10 @@ void plMetalPipeline::IRenderAuxSpans(const plSpan& span) ISetLocalToWorld(hsMatrix44::IdentityMatrix(), hsMatrix44::IdentityMatrix()); int i; - for( i = 0; i < span.GetNumAuxSpans(); i++ ) + for (i = 0; i < span.GetNumAuxSpans(); i++) IRenderAuxSpan(span, span.GetAuxSpan(i)); ISetLocalToWorld(span.fLocalToWorld, span.fWorldToLocal); - } // IRenderAuxSpan ////////////////////////////////////////////////////////// @@ -1467,110 +1439,106 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) // Set to render from the aux spans buffers. plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)aux->fGroup->GetVertexBufferRef(aux->fVBufferIdx); - if( !vRef ) + if (!vRef) return; plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)aux->fGroup->GetIndexBufferRef(aux->fIBufferIdx); - if( !iRef ) + if (!iRef) return; - // Now just loop through the aux material, rendering in as many passes as it takes. - hsGMaterial* material = aux->fMaterial; + hsGMaterial* material = aux->fMaterial; plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); - + if (mRef == nullptr) { mRef = new plMetalMaterialShaderRef(material, this); material->SetDeviceRef(mRef); } - + /* Vertex Buffer stuff */ - if(!vRef->GetBuffer()) { + if (!vRef->GetBuffer()) { return; } fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); fState.fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); - + plRenderTriListFunc render(&fDevice, 0, aux->fVStartIdx, aux->fVLength, aux->fIStartIdx, aux->fILength); - + for (int32_t pass = 0; pass < mRef->GetNumPasses(); pass++) { IHandleMaterialPass(material, pass, &span, vRef); - if( aux->fFlags & plAuxSpan::kOverrideLiteModel ) - { + if (aux->fFlags & plAuxSpan::kOverrideLiteModel) { fCurrentRenderPassUniforms->ambientCol = {1.0f, 1.0f, 1.0f}; - + fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->ambientSrc = 1.0; fCurrentRenderPassUniforms->emissiveSrc = 0.0; fCurrentRenderPassUniforms->specularSrc = 1.0; } - + render.RenderPrims(); } } -bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) +bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, const plSpan* currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) { plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); - - fCurrLayerIdx = mRef->GetPassIndex(pass); - plLayerInterface *lay = material->GetLayer(mRef->GetPassIndex(pass)); + fCurrLayerIdx = mRef->GetPassIndex(pass); + plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); hsGMatState s; s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); - - if( s.fZFlags & hsGMatState::kZIncLayer ) + + if (s.fZFlags & hsGMatState::kZIncLayer) ISetLayer(1); else ISetLayer(0); IHandleZMode(s); IHandleBlendMode(s); - + if (s.fMiscFlags & hsGMatState::kMiscTwoSided) { - if(fState.fCurrentCullMode != MTL::CullModeNone) { + if (fState.fCurrentCullMode != MTL::CullModeNone) { fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); - fState.fCurrentCullMode = MTL::CullModeNone; + fState.fCurrentCullMode = MTL::CullModeNone; } } else { ISetCullMode(); } - - //Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders to null after calling this. That won't work here in since our pipeline state has to know the shaders. - if(lay->GetVertexShader() && allowShaders) { - + + // Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders to null after calling this. That won't work here in since our pipeline state has to know the shaders. + if (lay->GetVertexShader() && allowShaders) { lay = IPushOverBaseLayer(lay); lay = IPushOverAllLayer(lay); - - //pure shader path - plShader *vertexShader = lay->GetVertexShader(); - plShader *fragShader = lay->GetPixelShader(); - + + // pure shader path + plShader* vertexShader = lay->GetVertexShader(); + plShader* fragShader = lay->GetPixelShader(); + fCurrLay = lay; fCurrNumLayers = mRef->fPassLengths[pass]; - + ISetShaders(vRef, s, vertexShader, fragShader); - - //FIXME: Programmable pipeline does not implement the full feature set + + // FIXME: Programmable pipeline does not implement the full feature set /* The programmable pipeline doesn't do things like set the texture transform matrices, In practice, the transforms aren't set and used. Does it matter that the Metal implementation doesn't implemention the full inputs the DX version gets? - + If it is implemented, the same checks the DX version does should be also implemented. DX will set texture transforms, but then turn them off in the pipeline and manually manipulate texture co-ords in the shader. - + Texture setting should also _maybe_ be reconciled with the "fixed" pipeline. But the fixed pipeline uses indirect textures mapped to a buffer. That approach could work for the programmable pipeline too, but I'm planning changes to the fixed pipeline and the way it stores textures. So maybe things should be reconciled after that work is done. */ - + for (size_t i = 0; i < material->GetNumLayers(); i++) { plLayerInterface* layer = material->GetLayer(i); if (!layer) { @@ -1578,33 +1546,32 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, } CheckTextureRef(layer); - + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); if (!img) { return false; } - + plMetalTextureRef* texRef = (plMetalTextureRef*)img->GetDeviceRef(); if (!texRef->fTexture) { return false; } - + size_t idOffset = 0; - //Metal doesn't like mixing 2D and cubic textures. If this is a cubic texture, make sure it lands in the right ID range. - if(plCubicRenderTarget::ConvertNoRef( img )) { + // Metal doesn't like mixing 2D and cubic textures. If this is a cubic texture, make sure it lands in the right ID range. + if (plCubicRenderTarget::ConvertNoRef(img)) { idOffset = FragmentShaderArgumentAttributeCubicTextures; } - + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(texRef->fTexture, i + idOffset); - } lay = IPopOverAllLayer(lay); lay = IPopOverBaseLayer(lay); } else { //"Fixed" path - + /* To compute correct lighting we need to add the pushover layers. The actual renderer will do it's own add and remove, so remove the @@ -1613,85 +1580,80 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, lay = IPushOverBaseLayer(lay); lay = IPushOverAllLayer(lay); ICalcLighting(mRef, lay, currSpan); - + s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); - + /* If the layer opacity is 0, don't draw it. This prevents it from contributing to the Z buffer. This can happen with some models like the fire marbles in the neighborhood that have some models for physics only, and then can block other rendering in the Z buffer. DX pipeline does this in ILoopOverLayers. */ - if( (s.fBlendFlags & hsGMatState::kBlendAlpha) - &&lay->GetOpacity() <= 0 - &&(fCurrLightingMethod != plSpan::kLiteVtxPreshaded) ) { - - //FIXME: All these popping of layers in the return sections is getting ugly - + if ((s.fBlendFlags & hsGMatState::kBlendAlpha) && lay->GetOpacity() <= 0 && (fCurrLightingMethod != plSpan::kLiteVtxPreshaded)) { + // FIXME: All these popping of layers in the return sections is getting ugly + lay = IPopOverAllLayer(lay); lay = IPopOverBaseLayer(lay); - + return false; } - + if (s.fBlendFlags & hsGMatState::kBlendInvertVtxAlpha) fCurrentRenderPassUniforms->invVtxAlpha = true; else fCurrentRenderPassUniforms->invVtxAlpha = false; - + std::vector& spanLights = currSpan->GetLightList(false); - + size_t numActivePiggyBacks = 0; - if( !(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive) ) - { + if (!(s.fMiscFlags & hsGMatState::kMiscBumpChans) && !(s.fShadeFlags & hsGMatState::kShadeEmissive)) { /// Tack lightmap onto last stage if we have one numActivePiggyBacks = fActivePiggyBacks; - } - + struct plMetalFragmentShaderDescription fragmentShaderDescription; - + lay = IPopOverAllLayer(lay); lay = IPopOverBaseLayer(lay); - - if(numActivePiggyBacks==0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { + + if (numActivePiggyBacks == 0 && fOverBaseLayer == nullptr && fOverAllLayer == nullptr) { mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass); - + fragmentShaderDescription = mRef->GetFragmentShaderDescription(pass); } else { - - //Plasma pulls piggybacks from the rear first, pull the number of active piggybacks - auto firstPiggyback = fPiggyBackStack.end() - numActivePiggyBacks; - auto lastPiggyback = fPiggyBackStack.end(); + // Plasma pulls piggybacks from the rear first, pull the number of active piggybacks + auto firstPiggyback = fPiggyBackStack.end() - numActivePiggyBacks; + auto lastPiggyback = fPiggyBackStack.end(); std::vector subPiggybacks(firstPiggyback, lastPiggyback); - mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fragmentShaderDescription, &subPiggybacks, - [&](plLayerInterface* layer, uint32_t index){ - if(index==0) { + mRef->EncodeArguments( + fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fragmentShaderDescription, &subPiggybacks, + [&](plLayerInterface* layer, uint32_t index) { + if (index == 0) { layer = IPushOverBaseLayer(layer); } layer = IPushOverAllLayer(layer); - + return layer; }, - [&](plLayerInterface* layer, uint32_t index){ + [&](plLayerInterface* layer, uint32_t index) { layer = IPopOverAllLayer(layer); - if(index==0) + if (index == 0) layer = IPopOverBaseLayer(layer); return layer; }); } - - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, fragmentShaderDescription).GetRenderPipelineState(); - const MTL::RenderPipelineState *pipelineState = linkedPipeline->pipelineState; - + + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, fragmentShaderDescription).GetRenderPipelineState(); + const MTL::RenderPipelineState* pipelineState = linkedPipeline->pipelineState; + /*plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered, sources, blendModes, miscFlags); const MTL::RenderPipelineState *pipelineState = pipeline->pipelineState;*/ - if(fState.fCurrentPipelineState != pipelineState) { + if (fState.fCurrentPipelineState != pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); fState.fCurrentPipelineState = pipelineState; } } - + return true; } @@ -1706,52 +1668,40 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial *material, uint32_t pass, void plMetalPipeline::ISetPipeConsts(plShader* shader) { size_t n = shader->GetNumPipeConsts(); - int i; - for( i = 0; i < n; i++ ) - { + int i; + for (i = 0; i < n; i++) { const plPipeConst& pc = shader->GetPipeConst(i); - switch( pc.fType ) - { - case plPipeConst::kFogSet: - { + switch (pc.fType) { + case plPipeConst::kFogSet: { float set[4]; - //FIXME: Fog broken in dynamic pipeline - //IGetVSFogSet(set); - //shader->SetFloat4(pc.fReg, set); - } - break; - case plPipeConst::kLayAmbient: - { + // FIXME: Fog broken in dynamic pipeline + // IGetVSFogSet(set); + // shader->SetFloat4(pc.fReg, set); + } break; + case plPipeConst::kLayAmbient: { hsColorRGBA col = fCurrLay->GetAmbientColor(); shader->SetColor(pc.fReg, col); - } - break; - case plPipeConst::kLayRuntime: - { + } break; + case plPipeConst::kLayRuntime: { hsColorRGBA col = fCurrLay->GetRuntimeColor(); col.a = fCurrLay->GetOpacity(); shader->SetColor(pc.fReg, col); - } - break; - case plPipeConst::kLaySpecular: - { + } break; + case plPipeConst::kLaySpecular: { hsColorRGBA col = fCurrLay->GetSpecularColor(); shader->SetColor(pc.fReg, col); - } - break; - case plPipeConst::kTex3x4_0: - case plPipeConst::kTex3x4_1: - case plPipeConst::kTex3x4_2: - case plPipeConst::kTex3x4_3: - case plPipeConst::kTex3x4_4: - case plPipeConst::kTex3x4_5: - case plPipeConst::kTex3x4_6: - case plPipeConst::kTex3x4_7: - { + } break; + case plPipeConst::kTex3x4_0: + case plPipeConst::kTex3x4_1: + case plPipeConst::kTex3x4_2: + case plPipeConst::kTex3x4_3: + case plPipeConst::kTex3x4_4: + case plPipeConst::kTex3x4_5: + case plPipeConst::kTex3x4_6: + case plPipeConst::kTex3x4_7: { int stage = pc.fType - plPipeConst::kTex3x4_0; - if( stage > fCurrNumLayers ) - { + if (stage > fCurrNumLayers) { // Ooops. This is bad, means the shader is expecting more layers than // we actually have (or is just bogus). Assert and quietly continue. hsAssert(false, "Shader asking for higher stage transform than we have"); @@ -1760,21 +1710,18 @@ void plMetalPipeline::ISetPipeConsts(plShader* shader) const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); shader->SetMatrix34(pc.fReg, xfm); - } - break; - case plPipeConst::kTex2x4_0: - case plPipeConst::kTex2x4_1: - case plPipeConst::kTex2x4_2: - case plPipeConst::kTex2x4_3: - case plPipeConst::kTex2x4_4: - case plPipeConst::kTex2x4_5: - case plPipeConst::kTex2x4_6: - case plPipeConst::kTex2x4_7: - { + } break; + case plPipeConst::kTex2x4_0: + case plPipeConst::kTex2x4_1: + case plPipeConst::kTex2x4_2: + case plPipeConst::kTex2x4_3: + case plPipeConst::kTex2x4_4: + case plPipeConst::kTex2x4_5: + case plPipeConst::kTex2x4_6: + case plPipeConst::kTex2x4_7: { int stage = pc.fType - plPipeConst::kTex2x4_0; - if( stage > fCurrNumLayers ) - { + if (stage > fCurrNumLayers) { // Ooops. This is bad, means the shader is expecting more layers than // we actually have (or is just bogus). Assert and quietly continue. hsAssert(false, "Shader asking for higher stage transform than we have"); @@ -1783,21 +1730,18 @@ void plMetalPipeline::ISetPipeConsts(plShader* shader) const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); shader->SetMatrix24(pc.fReg, xfm); - } - break; - case plPipeConst::kTex1x4_0: - case plPipeConst::kTex1x4_1: - case plPipeConst::kTex1x4_2: - case plPipeConst::kTex1x4_3: - case plPipeConst::kTex1x4_4: - case plPipeConst::kTex1x4_5: - case plPipeConst::kTex1x4_6: - case plPipeConst::kTex1x4_7: - { + } break; + case plPipeConst::kTex1x4_0: + case plPipeConst::kTex1x4_1: + case plPipeConst::kTex1x4_2: + case plPipeConst::kTex1x4_3: + case plPipeConst::kTex1x4_4: + case plPipeConst::kTex1x4_5: + case plPipeConst::kTex1x4_6: + case plPipeConst::kTex1x4_7: { int stage = pc.fType - plPipeConst::kTex1x4_0; - if( stage > fCurrNumLayers ) - { + if (stage > fCurrNumLayers) { // Ooops. This is bad, means the shader is expecting more layers than // we actually have (or is just bogus). Assert and quietly continue. hsAssert(false, "Shader asking for higher stage transform than we have"); @@ -1806,114 +1750,93 @@ void plMetalPipeline::ISetPipeConsts(plShader* shader) const hsMatrix44& xfm = fCurrMaterial->GetLayer(fCurrLayerIdx + stage)->GetTransform(); shader->SetFloat4(pc.fReg, xfm.fMap[0]); - } - break; - case plPipeConst::kLocalToNDC: - { + } break; + case plPipeConst::kLocalToNDC: { hsMatrix44 cam2ndc = IGetCameraToNDC(); hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); hsMatrix44 local2ndc = cam2ndc * world2cam * GetLocalToWorld(); shader->SetMatrix44(pc.fReg, local2ndc); - } - break; + } break; - case plPipeConst::kCameraToNDC: - { + case plPipeConst::kCameraToNDC: { hsMatrix44 cam2ndc = IGetCameraToNDC(); shader->SetMatrix44(pc.fReg, cam2ndc); - } - break; + } break; - case plPipeConst::kWorldToNDC: - { + case plPipeConst::kWorldToNDC: { hsMatrix44 cam2ndc = IGetCameraToNDC(); hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); hsMatrix44 world2ndc = cam2ndc * world2cam; shader->SetMatrix44(pc.fReg, world2ndc); - } - break; + } break; - case plPipeConst::kLocalToWorld: - shader->SetMatrix34(pc.fReg, GetLocalToWorld()); - break; + case plPipeConst::kLocalToWorld: + shader->SetMatrix34(pc.fReg, GetLocalToWorld()); + break; - case plPipeConst::kWorldToLocal: - shader->SetMatrix34(pc.fReg, GetWorldToLocal()); - break; + case plPipeConst::kWorldToLocal: + shader->SetMatrix34(pc.fReg, GetWorldToLocal()); + break; - case plPipeConst::kWorldToCamera: - { + case plPipeConst::kWorldToCamera: { hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); shader->SetMatrix34(pc.fReg, world2cam); - } - break; + } break; - case plPipeConst::kCameraToWorld: - { + case plPipeConst::kCameraToWorld: { hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld(); shader->SetMatrix34(pc.fReg, cam2world); - } - break; + } break; - case plPipeConst::kLocalToCamera: - { + case plPipeConst::kLocalToCamera: { hsMatrix44 world2cam = GetViewTransform().GetWorldToCamera(); hsMatrix44 local2cam = world2cam * GetLocalToWorld(); shader->SetMatrix34(pc.fReg, local2cam); - } - break; + } break; - case plPipeConst::kCameraToLocal: - { + case plPipeConst::kCameraToLocal: { hsMatrix44 cam2world = GetViewTransform().GetCameraToWorld(); hsMatrix44 cam2local = GetWorldToLocal() * cam2world; shader->SetMatrix34(pc.fReg, cam2local); - } - break; + } break; - case plPipeConst::kCamPosWorld: - { + case plPipeConst::kCamPosWorld: { shader->SetVectorW(pc.fReg, GetViewTransform().GetCameraToWorld().GetTranslate(), 1.f); - } - break; + } break; - case plPipeConst::kCamPosLocal: - { + case plPipeConst::kCamPosLocal: { hsPoint3 localCam = GetWorldToLocal() * GetViewTransform().GetCameraToWorld().GetTranslate(); shader->SetVectorW(pc.fReg, localCam, 1.f); - } - break; + } break; - case plPipeConst::kObjPosWorld: - { + case plPipeConst::kObjPosWorld: { shader->SetVectorW(pc.fReg, GetLocalToWorld().GetTranslate(), 1.f); - } - break; - - // UNIMPLEMENTED - case plPipeConst::kDirLight1: - case plPipeConst::kDirLight2: - case plPipeConst::kDirLight3: - case plPipeConst::kDirLight4: - case plPipeConst::kPointLight1: - case plPipeConst::kPointLight2: - case plPipeConst::kPointLight3: - case plPipeConst::kPointLight4: - case plPipeConst::kColorFilter: - case plPipeConst::kMaxType: - break; + } break; + + // UNIMPLEMENTED + case plPipeConst::kDirLight1: + case plPipeConst::kDirLight2: + case plPipeConst::kDirLight3: + case plPipeConst::kDirLight4: + case plPipeConst::kPointLight1: + case plPipeConst::kPointLight2: + case plPipeConst::kPointLight3: + case plPipeConst::kPointLight4: + case plPipeConst::kColorFilter: + case plPipeConst::kMaxType: + break; } } } @@ -1923,51 +1846,47 @@ void plMetalPipeline::ISetPipeConsts(plShader* shader) // be nil, in which case the fixed function pipeline is indicated. // Any Pipe Constants the non-FFP shader wants will be set here. // Lastly, all constants will be set (as a block) for any non-FFP vertex or pixel shader. -bool plMetalPipeline::ISetShaders(const plMetalVertexBufferRef * vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader) +bool plMetalPipeline::ISetShaders(const plMetalVertexBufferRef* vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader) { hsAssert(vShader, "Can't handle programmable passes without vShader"); hsAssert(pShader, "Can't handle programmable passes without pShader"); plShaderID::ID vertexShaderID = vShader->GetDecl()->GetID(); plShaderID::ID fragmentShaderID = pShader->GetDecl()->GetID(); - - plMetalDevice::plMetalLinkedPipeline *pipeline = plMetalDynamicMaterialPipelineState(&fDevice, vRef, blendMode.fBlendFlags, vertexShaderID, fragmentShaderID).GetRenderPipelineState(); - if(fState.fCurrentPipelineState != pipeline->pipelineState) { + + plMetalDevice::plMetalLinkedPipeline* pipeline = plMetalDynamicMaterialPipelineState(&fDevice, vRef, blendMode.fBlendFlags, vertexShaderID, fragmentShaderID).GetRenderPipelineState(); + if (fState.fCurrentPipelineState != pipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipeline->pipelineState); fState.fCurrentPipelineState = pipeline->pipelineState; } - - if( vShader ) - { + + if (vShader) { hsAssert(vShader->IsVertexShader(), "Wrong type shader as vertex shader"); ISetPipeConsts(vShader); - + plMetalVertexShader* vRef = (plMetalVertexShader*)vShader->GetDeviceRef(); - if( !vRef ) - { + if (!vRef) { vRef = new plMetalVertexShader(vShader); hsRefCnt_SafeUnRef(vRef); } - if( !vRef->IsLinked() ) + if (!vRef->IsLinked()) vRef->Link(&fVShaderRefList); - + vRef->ISetConstants(this); } - if( pShader ) - { + if (pShader) { hsAssert(pShader->IsPixelShader(), "Wrong type shader as pixel shader"); ISetPipeConsts(pShader); - + plMetalFragmentShader* pRef = (plMetalFragmentShader*)pShader->GetDeviceRef(); - if( !pRef ) - { + if (!pRef) { pRef = new plMetalFragmentShader(pShader); hsRefCnt_SafeUnRef(pRef); } - if( !pRef->IsLinked() ) + if (!pRef->IsLinked()) pRef->Link(&fPShaderRefList); - + pRef->ISetConstants(this); } @@ -2010,12 +1929,12 @@ bool plMetalPipeline::ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup // If our vertex buffer ref is volatile and the timestamp is off // then it needs to be refilled - //MTL::PurgeableState bufferState = vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateNonVolatile); + // MTL::PurgeableState bufferState = vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateNonVolatile); if (vRef->Expired(fVtxRefTime)) { IRefreshDynVertices(group, vRef); - //fDevice.GetCurrentCommandBuffer()->addCompletedHandler( ^(MTL::CommandBuffer *buffer) { - //vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); - //}); + // fDevice.GetCurrentCommandBuffer()->addCompletedHandler( ^(MTL::CommandBuffer *buffer) { + // vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); + // }); } if (iRef->IsDirty()) { @@ -2034,8 +1953,7 @@ bool plMetalPipeline::IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBu hsAssert(size > 0, "Bad start and end counts in a group"); - if (!vRef->GetBuffer()) - { + if (!vRef->GetBuffer()) { hsAssert(size > 0, "Being asked to fill a buffer that doesn't exist yet?"); } @@ -2044,16 +1962,16 @@ bool plMetalPipeline::IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBu vData = vRef->fData; else vData = group->GetVertBufferData(vRef->fIndex) + group->GetVertBufferStart(vRef->fIndex) * vRef->fVertexSize; - + vRef->PrepareForWrite(); MTL::Buffer* vertexBuffer = vRef->GetBuffer(); - if(!vertexBuffer || vertexBuffer->length() < size) { - //Plasma will present different length buffers at different times + if (!vertexBuffer || vertexBuffer->length() < size) { + // Plasma will present different length buffers at different times vertexBuffer = fDevice.fMetalDevice->newBuffer(vData, size, MTL::ResourceStorageModeManaged)->autorelease(); - if(vRef->Volatile()) { + if (vRef->Volatile()) { fDevice.GetCurrentCommandBuffer()->addCompletedHandler(^(MTL::CommandBuffer* buffer){ - //vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); + // vRef->fVertexBuffer->setPurgeableState(MTL::PurgeableStateVolatile); }); } vRef->SetBuffer(vertexBuffer); @@ -2072,15 +1990,14 @@ bool plMetalPipeline::IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBu void plMetalPipeline::IHandleZMode(hsGMatState flags) { - //Metal is very particular that if there is no depth buffer we need to explictly disable z read and write - if(fDevice.fCurrentDepthFormat == MTL::PixelFormatInvalid) { + // Metal is very particular that if there is no depth buffer we need to explictly disable z read and write + if (fDevice.fCurrentDepthFormat == MTL::PixelFormatInvalid) { fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); return; } - - MTL::DepthStencilState *newDepthState; - switch (flags.fZFlags & hsGMatState::kZMask) - { + + MTL::DepthStencilState* newDepthState; + switch (flags.fZFlags & hsGMatState::kZMask) { case hsGMatState::kZClearZ: fDevice.Clear(false, {0.0f, 0.0f, 0.0f, 0.0f}, true, 0.0); break; @@ -2104,8 +2021,8 @@ void plMetalPipeline::IHandleZMode(hsGMatState flags) hsAssert(false, "Illegal combination of Z Buffer modes (Clear but don't write)"); break; } - - if(fState.fCurrentDepthStencilState != newDepthState) { + + if (fState.fCurrentDepthStencilState != newDepthState) { fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(newDepthState); fState.fCurrentDepthStencilState = newDepthState; } @@ -2114,12 +2031,10 @@ void plMetalPipeline::IHandleZMode(hsGMatState flags) //// ISetLayer //////////////////////////////////////////////////////////////// // Sets whether we're rendering a base layer or upper layer. Upper layer has // a Z bias to avoid Z fighting. -void plMetalPipeline::ISetLayer( uint32_t lay ) +void plMetalPipeline::ISetLayer(uint32_t lay) { - if( lay ) - { - if( fCurrRenderLayer != lay ) - { + if (lay) { + if (fCurrRenderLayer != lay) { fCurrRenderLayer = lay; plCONST(int) kBiasMult = 8; @@ -2128,9 +2043,7 @@ void plMetalPipeline::ISetLayer( uint32_t lay ) static float max [[gnu::used]] = -0.00001; fDevice.CurrentRenderCommandEncoder()->setDepthBias(constBias, mult, max); } - } - else if( fCurrRenderLayer != 0 ) - { + } else if (fCurrRenderLayer != 0) { fCurrRenderLayer = 0; fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, 0.0, 0.0); } @@ -2140,26 +2053,25 @@ void plMetalPipeline::IHandleBlendMode(hsGMatState flags) { // No color, just writing out Z values. if (flags.fBlendFlags & hsGMatState::kBlendNoColor) { - //printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); + // printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); flags.fBlendFlags |= 0x80000000; } else { - switch (flags.fBlendFlags & hsGMatState::kBlendMask) - { + switch (flags.fBlendFlags & hsGMatState::kBlendMask) { // Detail is just a special case of alpha, handled in construction of the texture // mip chain by making higher levels of the chain more transparent. case hsGMatState::kBlendDetail: case hsGMatState::kBlendAlpha: if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalAlpha) { if (flags.fBlendFlags & hsGMatState::kBlendAlphaPremultiplied) { - //printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); + // printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); } else { - //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); + // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); } } else { if (flags.fBlendFlags & hsGMatState::kBlendAlphaPremultiplied) { - //printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); + // printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); } else { - //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); + // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); } } break; @@ -2167,75 +2079,68 @@ void plMetalPipeline::IHandleBlendMode(hsGMatState flags) // Multiply the final color onto the frame buffer. case hsGMatState::kBlendMult: if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalColor) { - //printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); + // printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); } else { - //printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); + // printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); } break; // Add final color to FB. case hsGMatState::kBlendAdd: - //printf("glBlendFunc(GL_ONE, GL_ONE);\n"); + // printf("glBlendFunc(GL_ONE, GL_ONE);\n"); break; // Multiply final color by FB color and add it into the FB. case hsGMatState::kBlendMADD: - //printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); + // printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); break; // Final color times final alpha, added into the FB. case hsGMatState::kBlendAddColorTimesAlpha: if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalAlpha) { - //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); + // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); } else { - //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); + // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); } break; // Overwrite final color onto FB case 0: - //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + // printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); break; - default: - { - hsAssert(false, "Too many blend modes specified in material"); - plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); - if( lay ) - { - if( lay->GetBlendFlags() & hsGMatState::kBlendAlpha ) - { - lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); - } - else - { - lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); - } + default: { + hsAssert(false, "Too many blend modes specified in material"); + plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); + if (lay) { + if (lay->GetBlendFlags() & hsGMatState::kBlendAlpha) { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAlpha); + } else { + lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); } - //layer state needs to be syncronized to the GPU - static_cast(fCurrMaterial->GetDeviceRef())->SetDirty(true); } - break; + // layer state needs to be syncronized to the GPU + static_cast(fCurrMaterial->GetDeviceRef())->SetDirty(true); + } break; } } } void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan) { - //plProfile_Inc(MatLightState); + // plProfile_Inc(MatLightState); - if (IsDebugFlagSet(plPipeDbg::kFlagAllBright)) - { - fCurrentRenderPassUniforms->globalAmb = { 1.0, 1.0, 1.0, 1.0 }; + if (IsDebugFlagSet(plPipeDbg::kFlagAllBright)) { + fCurrentRenderPassUniforms->globalAmb = {1.0, 1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->diffuseCol = { 1.0, 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->emissiveCol = { 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->specularCol = { 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->diffuseCol = {1.0, 1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->emissiveCol = {1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->emissiveCol = {1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->specularCol = {1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->ambientSrc = 1.0; - fCurrentRenderPassUniforms->diffuseSrc = 1.0; + fCurrentRenderPassUniforms->ambientSrc = 1.0; + fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->emissiveSrc = 1.0; fCurrentRenderPassUniforms->specularSrc = 1.0; @@ -2254,40 +2159,40 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye /// Select one of our three lighting methods switch (mode) { - case plSpan::kLiteMaterial: // Material shading + case plSpan::kLiteMaterial: // Material shading { if (state.fShadeFlags & hsGMatState::kShadeWhite) { - fCurrentRenderPassUniforms->globalAmb = { 1.0, 1.0, 1.0, 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { 1.0, 1.0, 1.0 }; + fCurrentRenderPassUniforms->globalAmb = {1.0, 1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0}; } else if (IsDebugFlagSet(plPipeDbg::kFlagNoPreShade)) { - fCurrentRenderPassUniforms->globalAmb = { 0.0, 0.0, 0.0, 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->globalAmb = {0.0, 0.0, 0.0, 1.0}; + fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; } else { hsColorRGBA amb = currLayer->GetPreshadeColor(); - fCurrentRenderPassUniforms->globalAmb = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.0 }; - fCurrentRenderPassUniforms->ambientCol = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b) }; + fCurrentRenderPassUniforms->globalAmb = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.0}; + fCurrentRenderPassUniforms->ambientCol = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b)}; } hsColorRGBA dif = currLayer->GetRuntimeColor(); - fCurrentRenderPassUniforms->diffuseCol = { static_cast(dif.r), static_cast(dif.g), static_cast(dif.b), static_cast(currLayer->GetOpacity()) }; + fCurrentRenderPassUniforms->diffuseCol = {static_cast(dif.r), static_cast(dif.g), static_cast(dif.b), static_cast(currLayer->GetOpacity())}; hsColorRGBA em = currLayer->GetAmbientColor(); - fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b) }; + fCurrentRenderPassUniforms->emissiveCol = {static_cast(em.r), static_cast(em.g), static_cast(em.b)}; // Set specular properties if (state.fShadeFlags & hsGMatState::kShadeSpecular) { hsColorRGBA spec = currLayer->GetSpecularColor(); - fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b) }; + fCurrentRenderPassUniforms->specularCol = {static_cast(spec.r), static_cast(spec.g), static_cast(spec.b)}; #if 0 mat.Power = currLayer->GetSpecularPower(); #endif } else { - fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; } fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->emissiveSrc = 1.0; - fCurrentRenderPassUniforms -> specularSrc = 1.0; + fCurrentRenderPassUniforms->specularSrc = 1.0; if (state.fShadeFlags & hsGMatState::kShadeNoShade) { fCurrentRenderPassUniforms->ambientSrc = 1.0; @@ -2299,13 +2204,13 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye break; } - case plSpan::kLiteVtxPreshaded: // Vtx preshaded + case plSpan::kLiteVtxPreshaded: // Vtx preshaded { - fCurrentRenderPassUniforms->globalAmb = { 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->diffuseCol = { 0.0, 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->emissiveCol = { 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->globalAmb = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->diffuseCol = {0.0, 0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->emissiveCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; fCurrentRenderPassUniforms->diffuseSrc = 0.0; fCurrentRenderPassUniforms->ambientSrc = 1.0; @@ -2316,87 +2221,83 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye } else { fCurrentRenderPassUniforms->emissiveSrc = 1.0; } - + fCurrLightingMethod = plSpan::kLiteVtxPreshaded; break; } - case plSpan::kLiteVtxNonPreshaded: // Vtx non-preshaded + case plSpan::kLiteVtxNonPreshaded: // Vtx non-preshaded { - fCurrentRenderPassUniforms->ambientCol = { 0.0, 0.0, 0.0 }; - fCurrentRenderPassUniforms->diffuseCol = { 0.0, 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->diffuseCol = {0.0, 0.0, 0.0, 0.0}; hsColorRGBA em = currLayer->GetAmbientColor(); - fCurrentRenderPassUniforms->emissiveCol = { static_cast(em.r), static_cast(em.g), static_cast(em.b) }; + fCurrentRenderPassUniforms->emissiveCol = {static_cast(em.r), static_cast(em.g), static_cast(em.b)}; // Set specular properties if (state.fShadeFlags & hsGMatState::kShadeSpecular) { hsColorRGBA spec = currLayer->GetSpecularColor(); - fCurrentRenderPassUniforms->specularCol = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b) }; + fCurrentRenderPassUniforms->specularCol = {static_cast(spec.r), static_cast(spec.g), static_cast(spec.b)}; #if 0 mat.Power = currLayer->GetSpecularPower(); #endif } else { - fCurrentRenderPassUniforms->specularCol = { 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; } hsColorRGBA amb = currLayer->GetPreshadeColor(); - fCurrentRenderPassUniforms->globalAmb = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a) }; + fCurrentRenderPassUniforms->globalAmb = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a)}; fCurrentRenderPassUniforms->ambientSrc = 0.0; fCurrentRenderPassUniforms->diffuseSrc = 0.0; fCurrentRenderPassUniforms->emissiveSrc = 1.0; fCurrentRenderPassUniforms->specularSrc = 1.0; - + fCurrLightingMethod = plSpan::kLiteVtxNonPreshaded; break; } } // Piggy-back some temporary fog stuff on the lighting... const plFogEnvironment* fog = (currSpan ? (currSpan->fFogEnvironment ? currSpan->fFogEnvironment : &fView.GetDefaultFog()) : nullptr); - - if (currLayer) - { + + if (currLayer) { if ((currLayer->GetShadeFlags() & hsGMatState::kShadeReallyNoFog) && !(fMatOverOff.fShadeFlags & hsGMatState::kShadeReallyNoFog)) fog = nil; } - - uint8_t type = fog ? fog->GetType() : plFogEnvironment::kNoFog; + + uint8_t type = fog ? fog->GetType() : plFogEnvironment::kNoFog; hsColorRGBA color; switch (type) { - case plFogEnvironment::kLinearFog: - { + case plFogEnvironment::kLinearFog: { float start, end; fog->GetPipelineParams(&start, &end, &color); fCurrentRenderPassUniforms->fogExponential = 0; fCurrentRenderPassUniforms->fogValues = {start, end}; - fCurrentRenderPassUniforms->fogColor = { static_cast(color.r), static_cast(color.g), static_cast(color.b) }; + fCurrentRenderPassUniforms->fogColor = {static_cast(color.r), static_cast(color.g), static_cast(color.b)}; break; } case plFogEnvironment::kExpFog: - case plFogEnvironment::kExp2Fog: - { + case plFogEnvironment::kExp2Fog: { float density; float power = (type == plFogEnvironment::kExp2Fog) ? 2.0f : 1.0f; fog->GetPipelineParams(&density, &color); fCurrentRenderPassUniforms->fogExponential = 1; - fCurrentRenderPassUniforms->fogValues = { power, density}; - fCurrentRenderPassUniforms->fogColor = { static_cast(color.r), static_cast(color.g), static_cast(color.b) }; + fCurrentRenderPassUniforms->fogValues = {power, density}; + fCurrentRenderPassUniforms->fogColor = {static_cast(color.r), static_cast(color.g), static_cast(color.b)}; break; } default: fCurrentRenderPassUniforms->fogExponential = 0; - fCurrentRenderPassUniforms->fogValues = { 0.0, 0.0 }; - fCurrentRenderPassUniforms->fogColor = { 0.0, 0.0, 0.0 }; + fCurrentRenderPassUniforms->fogValues = {0.0, 0.0}; + fCurrentRenderPassUniforms->fogColor = {0.0, 0.0, 0.0}; break; } - - - if( currLayer->GetBlendFlags() & (hsGMatState::kBlendAdd | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha) ) { - fCurrentRenderPassUniforms->fogColor = { 0.0, 0.0, 0.0 }; + + if (currLayer->GetBlendFlags() & (hsGMatState::kBlendAdd | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha)) { + fCurrentRenderPassUniforms->fogColor = {0.0, 0.0, 0.0}; } } @@ -2406,26 +2307,24 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye // strongest N changes membership. void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj) { - const size_t numLights = kMetalMaxLightCount; - int32_t i = 0; - int32_t startScale; - float threshhold; - float overHold = 0.3; - float scale; - static std::vector onLights; + const size_t numLights = kMetalMaxLightCount; + int32_t i = 0; + int32_t startScale; + float threshhold; + float overHold = 0.3; + float scale; + static std::vector onLights; onLights.clear(); - if (!IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights) && + if (!IsDebugFlagSet(plPipeDbg::kFlagNoRuntimeLights) && !(IsDebugFlagSet(plPipeDbg::kFlagNoApplyProjLights) && proj) && - !(IsDebugFlagSet(plPipeDbg::kFlagOnlyApplyProjLights) && !proj)) - { + !(IsDebugFlagSet(plPipeDbg::kFlagOnlyApplyProjLights) && !proj)) { std::vector& spanLights = span->GetLightList(proj); fLights.count = 0; for (i = 0; i < spanLights.size() && i < numLights; i++) { // If these are non-projected lights, go ahead and enable them. - if( !proj ) - { + if (!proj) { IEnableLight(fLights.count, spanLights[i]); fLights.count++; } @@ -2459,15 +2358,13 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef IScaleLight(i, span->GetLightScale(i, proj)); } } - + // For the projected lights, don't enable, just remember who they are. - if( proj ) - { + if (proj) { fProjAll.clear(); fProjEach.clear(); - for( i = 0; i < onLights.size(); i++ ) - { - if( onLights[i]->OverAll() ) + for (i = 0; i < onLights.size(); i++) { + if (onLights[i]->OverAll()) fProjAll.emplace_back(onLights[i]); else fProjEach.emplace_back(onLights[i]); @@ -2479,67 +2376,63 @@ void plMetalPipeline::ISelectLights(const plSpan* span, plMetalMaterialShaderRef void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) { hsColorRGBA amb = light->GetAmbient(); - fLights.lampSources[i].ambient = { static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a) }; + fLights.lampSources[i].ambient = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a)}; hsColorRGBA diff = light->GetDiffuse(); - fLights.lampSources[i].diffuse = { static_cast(diff.r), static_cast(diff.g), static_cast(diff.b), static_cast(diff.a) }; + fLights.lampSources[i].diffuse = {static_cast(diff.r), static_cast(diff.g), static_cast(diff.b), static_cast(diff.a)}; hsColorRGBA spec = light->GetSpecular(); - fLights.lampSources[i].specular = { static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), static_cast(spec.a) }; + fLights.lampSources[i].specular = {static_cast(spec.r), static_cast(spec.g), static_cast(spec.b), static_cast(spec.a)}; plDirectionalLightInfo* dirLight = nullptr; - plOmniLightInfo* omniLight = nullptr; - plSpotLightInfo* spotLight = nullptr; + plOmniLightInfo* omniLight = nullptr; + plSpotLightInfo* spotLight = nullptr; - if ((dirLight = plDirectionalLightInfo::ConvertNoRef(light)) != nullptr) - { + if ((dirLight = plDirectionalLightInfo::ConvertNoRef(light)) != nullptr) { hsVector3 lightDir = dirLight->GetWorldDirection(); - fLights.lampSources[i].position = { lightDir.fX, lightDir.fY, lightDir.fZ, 0.0 }; - fLights.lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; + fLights.lampSources[i].position = {lightDir.fX, lightDir.fY, lightDir.fZ, 0.0}; + fLights.lampSources[i].direction = {lightDir.fX, lightDir.fY, lightDir.fZ}; fLights.lampSources[i].constAtten = 1.0f; fLights.lampSources[i].linAtten = 0.0f; fLights.lampSources[i].quadAtten = 0.0f; - } - else if ((omniLight = plOmniLightInfo::ConvertNoRef(light)) != nullptr) - { + } else if ((omniLight = plOmniLightInfo::ConvertNoRef(light)) != nullptr) { hsPoint3 pos = omniLight->GetWorldPosition(); - fLights.lampSources[i].position = { pos.fX, pos.fY, pos.fZ, 1.0 }; + fLights.lampSources[i].position = {pos.fX, pos.fY, pos.fZ, 1.0}; // TODO: Maximum Range - + fLights.lampSources[i].constAtten = omniLight->GetConstantAttenuation(); fLights.lampSources[i].linAtten = omniLight->GetLinearAttenuation(); fLights.lampSources[i].quadAtten = omniLight->GetQuadraticAttenuation(); if (!omniLight->GetProjection() && (spotLight = plSpotLightInfo::ConvertNoRef(omniLight)) != nullptr) { hsVector3 lightDir = spotLight->GetWorldDirection(); - fLights.lampSources[i].direction = { lightDir.fX, lightDir.fY, lightDir.fZ }; + fLights.lampSources[i].direction = {lightDir.fX, lightDir.fY, lightDir.fZ}; float falloff = spotLight->GetFalloff(); float gamma = cosf(spotLight->GetSpotInner()); float phi = cosf(spotLight->GetProjection() ? hsConstants::half_pi : spotLight->GetSpotOuter()); - fLights.lampSources[i].spotProps = { falloff, gamma, phi }; + fLights.lampSources[i].spotProps = {falloff, gamma, phi}; } else { - fLights.lampSources[i].spotProps = { 0.0, 0.0, 0.0 }; + fLights.lampSources[i].spotProps = {0.0, 0.0, 0.0}; } - } - else { + } else { IDisableLight(i); } } void plMetalPipeline::IDisableLight(size_t i) { - fLights.lampSources[i].position = { 0.0f, 0.0f, 0.0f, 0.0f }; - fLights.lampSources[i].ambient = { 0.0f, 0.0f, 0.0f, 0.0f }; - fLights.lampSources[i].diffuse = { 0.0f, 0.0f, 0.0f, 0.0f }; - fLights.lampSources[i].specular = { 0.0f, 0.0f, 0.0f, 0.0f }; - fLights.lampSources[i].constAtten = { 1.0f }; - fLights.lampSources[i].linAtten = { 0.0f }; - fLights.lampSources[i].quadAtten = { 0.0f }; - fLights.lampSources[i].scale = { 0.0f }; + fLights.lampSources[i].position = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].ambient = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].diffuse = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].specular = {0.0f, 0.0f, 0.0f, 0.0f}; + fLights.lampSources[i].constAtten = {1.0f}; + fLights.lampSources[i].linAtten = {0.0f}; + fLights.lampSources[i].quadAtten = {0.0f}; + fLights.lampSources[i].scale = {0.0f}; } void plMetalPipeline::IScaleLight(size_t i, float scale) @@ -2550,20 +2443,20 @@ void plMetalPipeline::IScaleLight(size_t i, float scale) void plMetalPipeline::IDrawPlate(plPlate* plate) { - if(!plate->IsVisible()) { + if (!plate->IsVisible()) { return; } hsGMaterial* material = plate->GetMaterial(); - + plLayerInterface* lay = material->GetLayer(0); - hsGMatState s; + hsGMatState s; s.Composite(lay->GetState(), fMatOverOn, fMatOverOff); IHandleZMode(s); IHandleBlendMode(s); fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZReadOrWriteStencilState); fState.fCurrentDepthStencilState = fDevice.fNoZReadOrWriteStencilState; - + simd_float4x4 projMat = matrix_identity_float4x4; /// Set up the transform directly @@ -2582,15 +2475,15 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) if (!mRef->IsLinked()) { mRef->Link(&fMatRefList); } - + fDevice.SetLocalToWorldMatrix(plate->GetTransform()); - - plMetalPlateManager *pm = (plMetalPlateManager *)fPlateMgr; - - plMetalPlatePipelineState state(&fDevice); + + plMetalPlateManager* pm = (plMetalPlateManager*)fPlateMgr; + + plMetalPlatePipelineState state(&fDevice); plMetalDevice::plMetalLinkedPipeline* linkedPipeline = state.GetRenderPipelineState(); - - if(fState.fCurrentPipelineState != linkedPipeline->pipelineState) { + + if (fState.fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fState.fCurrentPipelineState = linkedPipeline->pipelineState; } @@ -2598,43 +2491,42 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&alpha, sizeof(float), 6); fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(pm->fDepthState); fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); - - int uniformSize = sizeof(VertexUniforms); + + int uniformSize = sizeof(VertexUniforms); VertexUniforms uniforms; uniforms.projectionMatrix = projMat; matrix_float4x4 modelMatrix; uniforms.worldToCameraMatrix = modelMatrix; uniforms.uvTransforms[0].UVWSrc = 0; - //uniforms.worldToLocalMatrix = fDevice.fMatrixW2L; - - //flip world to camera, it's upside down + // uniforms.worldToLocalMatrix = fDevice.fMatrixW2L; + + // flip world to camera, it's upside down matrix_float4x4 flip = matrix_identity_float4x4; flip.columns[1][1] = -1.0f; - - - //uniforms.worldToCameraMatrix = - //uniforms.cameraToWorldMatrix = fDevice.fMatrixC2W; + + // uniforms.worldToCameraMatrix = + // uniforms.cameraToWorldMatrix = fDevice.fMatrixC2W; uniforms.localToWorldMatrix = matrix_multiply(flip, fDevice.fMatrixL2W); - + mRef->FastEncodeArguments(fDevice.CurrentRenderCommandEncoder(), &uniforms, 0); - //FIXME: Hacking the old texture drawing into the plate path + // FIXME: Hacking the old texture drawing into the plate path mRef->prepareTextures(fDevice.CurrentRenderCommandEncoder(), 0); - - fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&uniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); - + + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&uniforms, sizeof(VertexUniforms), VertexShaderArgumentFixedFunctionUniforms); + pm->EncodeDraw(fDevice.CurrentRenderCommandEncoder()); - + IPopPiggyBacks(); } -//Push and pop light sources -//The DX version would just keep a giant pool of lights -//that could be claimed by different parts of the pipeline. -//In Metal, when a part of the pipeline wants to own lights -//we'll just let them push/pop the current state. +// Push and pop light sources +// The DX version would just keep a giant pool of lights +// that could be claimed by different parts of the pipeline. +// In Metal, when a part of the pipeline wants to own lights +// we'll just let them push/pop the current state. void plMetalPipeline::PushCurrentLightSources() { - plMetalLights *lightSources = new plMetalLights(); + plMetalLights* lightSources = new plMetalLights(); memcpy(lightSources, &fLights, sizeof(plMetalLights)); fLightSourceStack.emplace_back(lightSources); } @@ -2642,7 +2534,7 @@ void plMetalPipeline::PushCurrentLightSources() void plMetalPipeline::PopCurrentLightSources() { hsAssert(fLightSourceStack.size() > 0, "Asked to pop light sources but none on stack"); - plMetalLights *lightSources = fLightSourceStack.back(); + plMetalLights* lightSources = fLightSourceStack.back(); fLightSourceStack.pop_back(); memcpy(&fLights, lightSources, sizeof(plMetalLights)); delete lightSources; @@ -2659,12 +2551,12 @@ void plMetalPipeline::PopCurrentLightSources() // Must be matched with call to IPopOverBaseLayer. plLayerInterface* plMetalPipeline::IPushOverBaseLayer(plLayerInterface* li) { - if( !li ) + if (!li) return nil; fOverLayerStack.emplace_back(li); - if( !fOverBaseLayer ) + if (!fOverBaseLayer) return fOverBaseLayer = li; fForceMatHandle = true; @@ -2678,7 +2570,7 @@ plLayerInterface* plMetalPipeline::IPushOverBaseLayer(plLayerInterface* li) // Should match calls to IPushOverBaseLayer. plLayerInterface* plMetalPipeline::IPopOverBaseLayer(plLayerInterface* li) { - if( !li ) + if (!li) return nil; fForceMatHandle = true; @@ -2696,13 +2588,12 @@ plLayerInterface* plMetalPipeline::IPopOverBaseLayer(plLayerInterface* li) // Must be matched by call to IPopOverAllLayer plLayerInterface* plMetalPipeline::IPushOverAllLayer(plLayerInterface* li) { - if( !li ) + if (!li) return nil; fOverLayerStack.push_back(li); - if( !fOverAllLayer ) - { + if (!fOverAllLayer) { fOverAllLayer = li; fOverAllLayer->Eval(fTime, fFrame, 0); return fOverAllLayer; @@ -2720,7 +2611,7 @@ plLayerInterface* plMetalPipeline::IPushOverAllLayer(plLayerInterface* li) // Should match calls to IPushOverAllLayer. plLayerInterface* plMetalPipeline::IPopOverAllLayer(plLayerInterface* li) { - if( !li ) + if (!li) return nil; fForceMatHandle = true; @@ -2736,7 +2627,7 @@ plLayerInterface* plMetalPipeline::IPopOverAllLayer(plLayerInterface* li) // Push a projected texture on as a piggy back. void plMetalPipeline::IPushProjPiggyBack(plLayerInterface* li) { - if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) return; fPiggyBackStack.push_back(li); @@ -2748,7 +2639,7 @@ void plMetalPipeline::IPushProjPiggyBack(plLayerInterface* li) // Remove a projected texture from use as a piggy back. void plMetalPipeline::IPopProjPiggyBacks() { - if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) return; fPiggyBackStack.resize(fMatPiggyBacks); @@ -2764,17 +2655,15 @@ void plMetalPipeline::IPushPiggyBacks(hsGMaterial* mat) { hsAssert(!fMatPiggyBacks, "Push/Pop Piggy mismatch"); - if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) return; int i; - for( i = 0; i < mat->GetNumPiggyBacks(); i++ ) - { - if( !mat->GetPiggyBack(i) ) + for (i = 0; i < mat->GetNumPiggyBacks(); i++) { + if (!mat->GetPiggyBack(i)) continue; - if ((mat->GetPiggyBack(i)->GetMiscFlags() & hsGMatState::kMiscLightMap) - && IsDebugFlagSet(plPipeDbg::kFlagNoLightmaps)) + if ((mat->GetPiggyBack(i)->GetMiscFlags() & hsGMatState::kMiscLightMap) && IsDebugFlagSet(plPipeDbg::kFlagNoLightmaps)) continue; fPiggyBackStack.push_back(mat->GetPiggyBack(i)); @@ -2789,7 +2678,7 @@ void plMetalPipeline::IPushPiggyBacks(hsGMaterial* mat) // Matches IPushPiggyBacks. void plMetalPipeline::IPopPiggyBacks() { - if( fView.fRenderState & plPipeline::kRenderNoPiggyBacks ) + if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) return; fPiggyBackStack.resize(fPiggyBackStack.size() - fMatPiggyBacks); @@ -2816,7 +2705,8 @@ size_t plMetalPipeline::ISetNumActivePiggyBacks() return fActivePiggyBacks = std::min(fMaxPiggyBacks, uint32_t(fPiggyBackStack.size())); } -struct plAVTexVert { +struct plAVTexVert +{ float fPos[2]; float fUv[2]; }; @@ -2833,8 +2723,8 @@ void plMetalPipeline::IPreprocessAvatarTextures() if (fClothingOutfits.size() == 0) return; - - plMipmap *itemBufferTex = nullptr; + + plMipmap* itemBufferTex = nullptr; for (size_t oIdx = 0; oIdx < fClothingOutfits.size(); oIdx++) { plClothingOutfit* co = fClothingOutfits[oIdx]; @@ -2848,8 +2738,8 @@ void plMetalPipeline::IPreprocessAvatarTextures() if (rt == nullptr) { rt = IGetNextAvRT(); - //we're about to add a texture that wasn't there before - //mark the material as dirty + // we're about to add a texture that wasn't there before + // mark the material as dirty plMetalMaterialShaderRef* ref = static_cast(co->fMaterial->GetDeviceRef()); if (ref) { ref->SetDirty(true); @@ -2859,23 +2749,23 @@ void plMetalPipeline::IPreprocessAvatarTextures() PushRenderTarget(rt); fDevice.CurrentRenderCommandEncoder()->setViewport({0, 0, static_cast(rt->GetWidth()), static_cast(rt->GetHeight()), 0.f, 1.f}); - + static MTL::RenderPipelineState* baseAvatarRenderState = nullptr; static MTL::RenderPipelineState* avatarRenderState = nullptr; - + if (!baseAvatarRenderState) { - //This is a bit of a hack, this really should be part of the plMetalDevice's function map. - //But that hash map assumes that it follows the vertex arrangement of the models. - //After a refactor, this function creation should go there. + // This is a bit of a hack, this really should be part of the plMetalDevice's function map. + // But that hash map assumes that it follows the vertex arrangement of the models. + // After a refactor, this function creation should go there. MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init()->autorelease(); - MTL::Library* library = fDevice.fMetalDevice->newDefaultLibrary()->autorelease(); - + MTL::Library* library = fDevice.fMetalDevice->newDefaultLibrary()->autorelease(); + MTL::Function* vertFunction = library->newFunction(NS::MakeConstantString("PreprocessAvatarVertexShader"))->autorelease(); MTL::Function* fragFunction = library->newFunction(NS::MakeConstantString("PreprocessAvatarFragmentShader"))->autorelease(); - + descriptor->setVertexFunction(vertFunction); descriptor->setFragmentFunction(fragFunction); - + MTL::VertexDescriptor* vertexDescriptor = MTL::VertexDescriptor::vertexDescriptor(); vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(0)->setBufferIndex(0); @@ -2883,16 +2773,16 @@ void plMetalPipeline::IPreprocessAvatarTextures() vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(1)->setBufferIndex(0); vertexDescriptor->attributes()->object(1)->setOffset(sizeof(float) * 2); - + vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 4); - + descriptor->setVertexDescriptor(vertexDescriptor); - + descriptor->colorAttachments()->object(0)->setBlendingEnabled(false); descriptor->colorAttachments()->object(0)->setPixelFormat(MTL::PixelFormatBGRA8Unorm); NS::Error* error = nullptr; baseAvatarRenderState = fDevice.fMetalDevice->newRenderPipelineState(descriptor, &error); - + descriptor->colorAttachments()->object(0)->setBlendingEnabled(true); descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); @@ -2903,40 +2793,33 @@ void plMetalPipeline::IPreprocessAvatarTextures() float uOff = 0.5f / rt->GetWidth(); float vOff = 0.5f / rt->GetHeight(); - - plClothingLayout *layout = plClothingMgr::GetClothingMgr()->GetLayout(co->fBase->fLayoutName); - for (plClothingItem *item : co->fItems) - { - - for (size_t j = 0; j < item->fElements.size(); j++) - { - for (int k = 0; k < plClothingElement::kLayerMax; k++) - { + plClothingLayout* layout = plClothingMgr::GetClothingMgr()->GetLayout(co->fBase->fLayoutName); + + for (plClothingItem* item : co->fItems) { + for (size_t j = 0; j < item->fElements.size(); j++) { + for (int k = 0; k < plClothingElement::kLayerMax; k++) { if (item->fTextures[j][k] == nullptr) continue; - + itemBufferTex = item->fTextures[j][k]; hsColorRGBA tint = co->GetItemTint(item, k); if (k >= plClothingElement::kLayerSkinBlend1 && k <= plClothingElement::kLayerSkinLast) tint.a = co->fSkinBlends[k - plClothingElement::kLayerSkinBlend1]; - - if (k == plClothingElement::kLayerBase) - { - if(fState.fCurrentPipelineState != baseAvatarRenderState) { + + if (k == plClothingElement::kLayerBase) { + if (fState.fCurrentPipelineState != baseAvatarRenderState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(baseAvatarRenderState); fState.fCurrentPipelineState = baseAvatarRenderState; } - } - else - { - if(fState.fCurrentPipelineState != avatarRenderState) { + } else { + if (fState.fCurrentPipelineState != avatarRenderState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(avatarRenderState); fState.fCurrentPipelineState = avatarRenderState; } } fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&tint, sizeof(hsColorRGBA), 0); - + float screenW = (float)item->fElements[j]->fWidth / layout->fOrigWidth * 2.f; float screenH = (float)item->fElements[j]->fHeight / layout->fOrigWidth * 2.f; float screenX = (float)item->fElements[j]->fXPos / layout->fOrigWidth * 2.f - 1.f; @@ -2956,20 +2839,18 @@ void plMetalPipeline::IPreprocessAvatarTextures() } void plMetalPipeline::IDrawClothingQuad(float x, float y, float w, float h, - float uOff, float vOff, plMipmap *tex) + float uOff, float vOff, plMipmap* tex) { - const uint32_t kVSize = sizeof(plAVTexVert); + const uint32_t kVSize = sizeof(plAVTexVert); plMetalTextureRef* ref = (plMetalTextureRef*)tex->GetDeviceRef(); - if (!ref || ref->IsDirty()) - { + if (!ref || ref->IsDirty()) { CheckTextureRef(tex); ref = (plMetalTextureRef*)tex->GetDeviceRef(); } - if (!ref->fTexture) - { + if (!ref->fTexture) { IReloadTexture(tex, ref); } - hsRefCnt_SafeAssign( fLayerRef[0], ref ); + hsRefCnt_SafeAssign(fLayerRef[0], ref); fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 0); plAVTexVert ptr[4]; @@ -2998,28 +2879,28 @@ void plMetalPipeline::IDrawClothingQuad(float x, float y, float w, float h, ptr[3] = vert; ptr[3].fPos[1] += h; ptr[3].fUv[1] -= 1.f; - + fDevice.CurrentRenderCommandEncoder()->setVertexBytes(ptr, sizeof(ptr), 0); fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveType::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } -void plMetalPipeline::FindFragFunction() { - MTL::Library *library = fDevice.fMetalDevice->newDefaultLibrary(); - - NS::Error *error = nullptr; - - MTL::FunctionConstantValues *functionContents = MTL::FunctionConstantValues::alloc()->init(); - short numUVs=1; +void plMetalPipeline::FindFragFunction() +{ + MTL::Library* library = fDevice.fMetalDevice->newDefaultLibrary(); + + NS::Error* error = nullptr; + + MTL::FunctionConstantValues* functionContents = MTL::FunctionConstantValues::alloc()->init(); + short numUVs = 1; functionContents->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumUVs); functionContents->setConstantValue(&numUVs, MTL::DataTypeUShort, FunctionConstantNumLayers); - - MTL::Function *fragFunction = library->newFunction( - NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), - functionContents, - &error - ); + + MTL::Function* fragFunction = library->newFunction( + NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), + functionContents, + &error); fFragFunction = fragFunction; - + functionContents->release(); library->release(); } @@ -3035,8 +2916,7 @@ void plMetalPipeline::FindFragFunction() { void plMetalPipeline::IClearShadowSlaves() { int i; - for( i = 0; i < fShadows.size(); i++ ) - { + for (i = 0; i < fShadows.size(); i++) { const plShadowCaster* caster = fShadows[i]->fCaster; caster->GetKey()->UnRefObject(); } @@ -3047,7 +2927,7 @@ void plMetalPipeline::IClearShadowSlaves() bool plMetalPipeline::ICreateDynDeviceObjects() { // Front/Back/Depth buffers - //if( ICreateNormalSurfaces() ) + // if( ICreateNormalSurfaces() ) // return true; // RenderTarget pools are shared for our shadow generation algorithm. @@ -3056,7 +2936,7 @@ bool plMetalPipeline::ICreateDynDeviceObjects() // Create device-specific stuff fDebugTextMgr = new plDebugTextManager(); - if( fDebugTextMgr == nil ) + if (fDebugTextMgr == nil) return true; // Vertex buffers, index buffers, textures, etc. @@ -3075,12 +2955,11 @@ void plMetalPipeline::IReleaseDynDeviceObjects() // themselves from their parent objects yet delete fDebugTextMgr; fDebugTextMgr = nil; - - while( fTextFontRefList ) + + while (fTextFontRefList) delete fTextFontRefList; - while( fRenderTargetRefList ) - { + while (fRenderTargetRefList) { plMetalRenderTargetRef* rtRef = fRenderTargetRefList; rtRef->Release(); rtRef->Unlink(); @@ -3089,9 +2968,8 @@ void plMetalPipeline::IReleaseDynDeviceObjects() // The shared dynamic vertex buffers used by things like objects skinned on CPU, or // particle systems. IReleaseDynamicBuffers(); - //IReleaseAvRTPool(); + // IReleaseAvRTPool(); IReleaseRenderTargetPools(); - } // IReleaseDynamicBuffers ///////////////////////////////////////////////// @@ -3111,46 +2989,40 @@ void plMetalPipeline::IReleaseRenderTargetPools() { int i; - for( i = 0; i < fRenderTargetPool512.size(); i++ ) - { + for (i = 0; i < fRenderTargetPool512.size(); i++) { delete fRenderTargetPool512[i]; fRenderTargetPool512[i] = nil; } fRenderTargetPool512.clear(); - for( i = 0; i < fRenderTargetPool256.size(); i++ ) - { + for (i = 0; i < fRenderTargetPool256.size(); i++) { delete fRenderTargetPool256[i]; fRenderTargetPool256[i] = nil; } fRenderTargetPool256.clear(); - for( i = 0; i < fRenderTargetPool128.size(); i++ ) - { + for (i = 0; i < fRenderTargetPool128.size(); i++) { delete fRenderTargetPool128[i]; fRenderTargetPool128[i] = nil; } fRenderTargetPool128.clear(); - for( i = 0; i < fRenderTargetPool64.size(); i++ ) - { + for (i = 0; i < fRenderTargetPool64.size(); i++) { delete fRenderTargetPool64[i]; fRenderTargetPool64[i] = nil; } fRenderTargetPool64.clear(); - for( i = 0; i < fRenderTargetPool32.size(); i++ ) - { + for (i = 0; i < fRenderTargetPool32.size(); i++) { delete fRenderTargetPool32[i]; fRenderTargetPool32[i] = nil; } fRenderTargetPool32.clear(); - for( i = 0; i < kMaxRenderTargetNext; i++ ) - { + for (i = 0; i < kMaxRenderTargetNext; i++) { fRenderTargetNext[i] = 0; - //fBlurScratchRTs[i] = nil; - //fBlurDestRTs[i] = nil; + // fBlurScratchRTs[i] = nil; + // fBlurDestRTs[i] = nil; } #ifdef MF_ENABLE_HACKOFF @@ -3164,10 +3036,8 @@ void plMetalPipeline::IReleaseRenderTargetPools() /////////////////////////////////////////////////////////////////////////////// // See plGLight/plShadowMaster.cpp for more notes. - - -float blurScale = -1.f; -static const int kL2NumSamples = 3; // Log2(4) +float blurScale = -1.f; +static const int kL2NumSamples = 3; // Log2(4) // IPrepShadowCaster //////////////////////////////////////////////////////////////////////// // Make sure all the geometry in this shadow caster is ready to be rendered. @@ -3189,10 +3059,8 @@ bool plMetalPipeline::IPrepShadowCaster(const plShadowCaster* caster) const std::vector& castSpans = caster->Spans(); int i; - for( i = 0; i < castSpans.size(); i++ ) - { - if( !done.IsBitSet(i) ) - { + for (i = 0; i < castSpans.size(); i++) { + if (!done.IsBitSet(i)) { // We haven't already done this castSpan plDrawableSpans* drawable = castSpans[i].fDraw; @@ -3201,7 +3069,7 @@ bool plMetalPipeline::IPrepShadowCaster(const plShadowCaster* caster) static std::vector visList; visList.clear(); visList.push_back((int16_t)(castSpans[i].fIndex)); - + // We're about to have done this castSpan. done.SetBit(i); @@ -3209,10 +3077,8 @@ bool plMetalPipeline::IPrepShadowCaster(const plShadowCaster* caster) // with the same drawable, and add them to visList. // We'll handle all the spans from this drawable at once. int j; - for( j = i+1; j < castSpans.size(); j++ ) - { - if( !done.IsBitSet(j) && (castSpans[j].fDraw == drawable) ) - { + for (j = i + 1; j < castSpans.size(); j++) { + if (!done.IsBitSet(j) && (castSpans[j].fDraw == drawable)) { // Add to list visList.push_back((int16_t)(castSpans[j].fIndex)); @@ -3221,10 +3087,10 @@ bool plMetalPipeline::IPrepShadowCaster(const plShadowCaster* caster) } } // That's all, prep the drawable. - drawable->PrepForRender( this ); + drawable->PrepForRender(this); // Do any software skinning. - if( !ISoftwareVertexBlend(drawable, visList) ) + if (!ISoftwareVertexBlend(drawable, visList)) return false; } } @@ -3239,47 +3105,45 @@ bool plMetalPipeline::IRenderShadowCaster(plShadowSlave* slave) const plShadowCaster* caster = slave->fCaster; // Setup to render into the slave's render target. - if( !IPushShadowCastState(slave) ) + if (!IPushShadowCastState(slave)) return false; // Get the shadow caster ready to render. - if( !IPrepShadowCaster(slave->fCaster) ) + if (!IPrepShadowCaster(slave->fCaster)) return false; // for each shadowCaster.fSpans int iSpan; - for( iSpan = 0; iSpan < caster->Spans().size(); iSpan++ ) - { + for (iSpan = 0; iSpan < caster->Spans().size(); iSpan++) { plDrawableSpans* dr = caster->Spans()[iSpan].fDraw; - const plSpan* sp = caster->Spans()[iSpan].fSpan; - uint32_t spIdx = caster->Spans()[iSpan].fIndex; + const plSpan* sp = caster->Spans()[iSpan].fSpan; + uint32_t spIdx = caster->Spans()[iSpan].fIndex; hsAssert(sp->fTypeMask & plSpan::kIcicleSpan, "Shadow casting from non-trimeshes not currently supported"); // render shadowcaster.fSpans[i] to rendertarget - if( !(sp->fProps & plSpan::kPropNoShadowCast) ) + if (!(sp->fProps & plSpan::kPropNoShadowCast)) IRenderShadowCasterSpan(slave, dr, *(const plIcicle*)sp); // Keep track of which shadow slaves this span was rendered into. // If self-shadowing is off, we use that to determine not to // project the shadow map onto its source geometry. - sp->SetShadowBit(slave->fIndex); //index set in SubmitShadowSlave + sp->SetShadowBit(slave->fIndex); // index set in SubmitShadowSlave } // Debug only. - if( blurScale >= 0.f ) + if (blurScale >= 0.f) slave->fBlurScale = blurScale; // If this shadow requests being blurred, do it. - if( slave->fBlurScale > 0.f ) + if (slave->fBlurScale > 0.f) fDevice.EncodeBlur(fDevice.GetCurrentCommandBuffer(), fDevice.fCurrentFragmentOutputTexture, slave->fBlurScale); - + // Finished up, restore previous state. IPopShadowCastState(slave); #if MCN_BOUNDS_SPANS - if (IsDebugFlagSet(plPipeDbg::kFlagShowShadowBounds)) - { + if (IsDebugFlagSet(plPipeDbg::kFlagShowShadowBounds)) { /// Add a span to our boundsIce to show this IAddBoundsSpan(fBoundsSpans, &slave->fWorldBounds); } @@ -3302,7 +3166,7 @@ void plMetalPipeline::IPreprocessShadows() // Some board (possibly the Parhelia) freaked if anistropic filtering // was enabled when rendering to a render target. We never need it for // shadow maps, and it is slower, so we just kill it here. - //ISetAnisotropy(false); + // ISetAnisotropy(false); // Generate a shadow map for each submitted shadow slave. // Shadow slave corresponds to one shadow caster paired @@ -3313,22 +3177,19 @@ void plMetalPipeline::IPreprocessShadows() // permutation explosion, because a slave is only generated // for a caster being affected (in range etc.) by a light. int iSlave; - for( iSlave = 0; iSlave < fShadows.size(); iSlave++ ) - { + for (iSlave = 0; iSlave < fShadows.size(); iSlave++) { plShadowSlave* slave = fShadows[iSlave]; - + // Any trouble, remove it from the list for this frame. - if( !IRenderShadowCaster(slave) ) - { + if (!IRenderShadowCaster(slave)) { fShadows.erase(fShadows.begin() + iSlave); iSlave--; continue; } - } // Restore - //ISetAnisotropy(true); + // ISetAnisotropy(true); plProfile_EndTiming(PrepShadows); } @@ -3339,23 +3200,23 @@ void plMetalPipeline::IPreprocessShadows() bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) { plRenderTarget* renderTarg = IFindRenderTarget(slave->fWidth, slave->fHeight, slave->fView.GetOrthogonal()); - if( !renderTarg ) + if (!renderTarg) return false; // Let the slave setup the transforms, viewport, etc. necessary to render it's shadow // map. This just goes into a plViewTransform, we translate that into D3D state ourselves below. if (!slave->SetupViewTransform(this)) return false; - + // Set texture to U_LUT fCurrentRenderPassUniforms->specularSrc = 0.0; - //if( !ref->fTexture ) + // if( !ref->fTexture ) //{ - // if( ref->fData ) - // IReloadTexture( ref ); - //} - //fDevice.SetRenderTarget(ref->fTexture); + // if( ref->fData ) + // IReloadTexture( ref ); + // } + // fDevice.SetRenderTarget(ref->fTexture); // Push the shadow slave's view transform as our current render state. fViewStack.push(fView); @@ -3369,13 +3230,12 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) // We'll be rendering the light space distance to the span fragment into // alpha (color is white), so our camera space position, transformed into light space // and then converted to [0..255] via our ULut. - - //METAL NOTE: D3DTSS_TCI_CAMERASPACEPOSITION and D3DTTFF_COUNT3 are hardcoded into the shader + + // METAL NOTE: D3DTSS_TCI_CAMERASPACEPOSITION and D3DTTFF_COUNT3 are hardcoded into the shader // Set texture transform to slave's lut transform. See plShadowMaster::IComputeLUT(). hsMatrix44 castLUT = slave->fCastLUT; - if( slave->fFlags & plShadowSlave::kCastInCameraSpace ) - { + if (slave->fFlags & plShadowSlave::kCastInCameraSpace) { hsMatrix44 c2w = GetCameraToWorld(); castLUT = castLUT * c2w; @@ -3431,39 +3291,36 @@ bool plMetalPipeline::IPushShadowCastState(plShadowSlave* slave) fD3DDevice->SetTextureStageState(1, D3DTSS_ALPHAOP, D3DTOP_DISABLE); fLayerState[1].fBlendFlags = uint32_t(-1);*/ - //fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE); - //fD3DDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE); - //fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO); + // fD3DDevice->SetRenderState(D3DRS_ALPHABLENDENABLE, TRUE); + // fD3DDevice->SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE); + // fD3DDevice->SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ZERO); - //fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS); + // fD3DDevice->SetRenderState(D3DRS_ALPHAFUNC, D3DCMP_ALWAYS); slave->fPipeData = renderTarg; // Enable ZBuffering w/ write - //fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE); - // fLayerState[0].fZFlags &= ~hsGMatState::kZMask; + // fD3DDevice->SetRenderState(D3DRS_ZWRITEENABLE, TRUE); + // fLayerState[0].fZFlags &= ~hsGMatState::kZMask; // Clear the render target: // alpha to white ensures no shadow where there's no caster // color to black in case we ever get blurring going // Z to 1 // Stencil ignored - if( slave->ReverseZ() ) - { + if (slave->ReverseZ()) { fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fReverseZStencilState); - //fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_GREATEREQUAL); - //fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 0.0f, 0L); - } - else - { + // fD3DDevice->SetRenderState(D3DRS_ZFUNC, D3DCMP_GREATEREQUAL); + // fD3DDevice->Clear(0, nil, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, clearColor, 0.0f, 0L); + } else { fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fDefaultStencilState); } // Bring the viewport in (AFTER THE CLEAR) to protect the alpha boundary. - fView.GetViewTransform().SetViewPort(1, 1, (float)(slave->fWidth-2), (float)(slave->fHeight-2), false); + fView.GetViewTransform().SetViewPort(1, 1, (float)(slave->fWidth - 2), (float)(slave->fHeight - 2), false); fDevice.SetViewport(); - //inlEnsureLightingOff(); + // inlEnsureLightingOff(); return true; } @@ -3484,27 +3341,23 @@ void plMetalPipeline::ISetupShadowState(plShadowSlave* slave, plShadowState& sha slave->fSelfShadowOn = false; - if( slave->Positional() ) - { + if (slave->Positional()) { hsPoint3 position = slave->fLightPos; shadowState.lightPosition.x = position.fX; shadowState.lightPosition.y = position.fY; shadowState.lightPosition.z = position.fZ; shadowState.directional = false; - } - else - { + } else { hsVector3 dir = slave->fLightDir; shadowState.lightDirection.x = dir.fX; shadowState.lightDirection.y = dir.fY; shadowState.lightDirection.z = dir.fZ; - + shadowState.directional = true; } } - // IFindRenderTarget ////////////////////////////////////////////////////////////////// // Find a matching render target from the pools. We prefer the requested size, but // will look for a smaller size if there isn't one available. @@ -3513,38 +3366,36 @@ void plMetalPipeline::ISetupShadowState(plShadowSlave* slave, plShadowState& sha plRenderTarget* plMetalPipeline::IFindRenderTarget(uint32_t& width, uint32_t& height, bool ortho) { std::vector* pool = nil; - uint32_t* iNext = nil; + uint32_t* iNext = nil; // NOT CURRENTLY SUPPORTING NON-SQUARE SHADOWS. IF WE DO, CHANGE THIS. - switch(height) - { - case 512: - pool = &fRenderTargetPool512; - iNext = &fRenderTargetNext[9]; - break; - case 256: - pool = &fRenderTargetPool256; - iNext = &fRenderTargetNext[8]; - break; - case 128: - pool = &fRenderTargetPool128; - iNext = &fRenderTargetNext[7]; - break; - case 64: - pool = &fRenderTargetPool64; - iNext = &fRenderTargetNext[6]; - break; - case 32: - pool = &fRenderTargetPool32; - iNext = &fRenderTargetNext[5]; - break; - default: - return nil; + switch (height) { + case 512: + pool = &fRenderTargetPool512; + iNext = &fRenderTargetNext[9]; + break; + case 256: + pool = &fRenderTargetPool256; + iNext = &fRenderTargetNext[8]; + break; + case 128: + pool = &fRenderTargetPool128; + iNext = &fRenderTargetNext[7]; + break; + case 64: + pool = &fRenderTargetPool64; + iNext = &fRenderTargetNext[6]; + break; + case 32: + pool = &fRenderTargetPool32; + iNext = &fRenderTargetNext[5]; + break; + default: + return nil; } plRenderTarget* rt = (*pool)[*iNext]; - if( !rt ) - { + if (!rt) { // We didn't find one, try again the next size down. - if( height > 32 ) + if (height > 32) return IFindRenderTarget(width >>= 1, height >>= 1, ortho); // We must be totally out. Oh well. @@ -3562,21 +3413,21 @@ plRenderTarget* plMetalPipeline::IFindRenderTarget(uint32_t& width, uint32_t& he // that wants the depth buffer dimensions to match the color buffer size. // It may be that NVidia hardware doesn't care any more. Contact Matthias // about that. -hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRenderTarget *owner) +hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRenderTarget* owner) { - plMetalRenderTargetRef* ref = nil; - MTL::Texture* depthSurface = nil; - MTL::Texture* texture = nil; - MTL::Texture* cTexture = nil; + plMetalRenderTargetRef* ref = nil; + MTL::Texture* depthSurface = nil; + MTL::Texture* texture = nil; + MTL::Texture* cTexture = nil; int i; plCubicRenderTarget* cubicRT; - uint16_t width, height; + uint16_t width, height; // If we don't already have one to share from, start from scratch. - if( !share ) + if (!share) return MakeRenderTargetRef(owner); - //hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd"); + // hsAssert(!fManagedAlloced, "Allocating non-managed resource with managed resources alloc'd"); #ifdef HS_DEBUGGING // Check out the validity of the match. Debug only. @@ -3588,25 +3439,23 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe #endif // HS_DEBUGGING /// Check--is this renderTarget really a child of a cubicRenderTarget? - if( owner->GetParent() != nil ) - { + if (owner->GetParent() != nil) { /// This'll create the deviceRefs for all of its children as well SharedRenderTargetRef(share->GetParent(), owner->GetParent()); return owner->GetDeviceRef(); } - if( owner->GetDeviceRef() != nil ) - ref = (plMetalRenderTargetRef *)owner->GetDeviceRef(); + if (owner->GetDeviceRef() != nil) + ref = (plMetalRenderTargetRef*)owner->GetDeviceRef(); // Look for a good format of matching color and depth size. - //FIXME: we're hardcoded for a certain tier and we aren't trying to create matching render buffers for efficiency - //if( !IFindRenderTargetInfo(owner, surfFormat, resType) ) + // FIXME: we're hardcoded for a certain tier and we aren't trying to create matching render buffers for efficiency + // if( !IFindRenderTargetInfo(owner, surfFormat, resType) ) //{ // hsAssert( false, "Error getting renderTarget info" ); // return nil; //} - /// Create the render target now // Start with the depth. We're just going to share the depth surface on the // input shareRef. @@ -3614,119 +3463,102 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe hsAssert(shareRef, "Trying to share from a render target with no ref"); depthSurface = shareRef->fDepthBuffer; - //FIXME: Add the usage to these textures, they're only accessed by the GPU - // Check for Cubic. This is unlikely, since this function is currently only - // used for the shadow map pools. - cubicRT = plCubicRenderTarget::ConvertNoRef( owner ); - if( cubicRT != nil ) - { + // FIXME: Add the usage to these textures, they're only accessed by the GPU + // Check for Cubic. This is unlikely, since this function is currently only + // used for the shadow map pools. + cubicRT = plCubicRenderTarget::ConvertNoRef(owner); + if (cubicRT != nil) { /// And create the ref (it'll know how to set all the flags) - if( ref != nil ) + if (ref != nil) ref->SetOwner(owner); else { ref = new plMetalRenderTargetRef(); ref->SetOwner(owner); } - - MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatRGBA8Uint, owner->GetWidth(), false); - MTL::Texture* cubeTexture = fDevice.fMetalDevice->newTexture(textureDescriptor); - // hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd"); - if( cubeTexture ) - { + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::textureCubeDescriptor(MTL::PixelFormatRGBA8Uint, owner->GetWidth(), false); + MTL::Texture* cubeTexture = fDevice.fMetalDevice->newTexture(textureDescriptor); + // hsAssert(!fManagedAlloced, "Alloc default with managed alloc'd"); + if (cubeTexture) { /// Create a CUBIC texture - for( i = 0; i < 6; i++ ) - { - plRenderTarget *face = cubicRT->GetFace( i ); - plMetalRenderTargetRef *fRef; - - if( face->GetDeviceRef() != nil ) - { - fRef = (plMetalRenderTargetRef *)face->GetDeviceRef(); + for (i = 0; i < 6; i++) { + plRenderTarget* face = cubicRT->GetFace(i); + plMetalRenderTargetRef* fRef; + + if (face->GetDeviceRef() != nil) { + fRef = (plMetalRenderTargetRef*)face->GetDeviceRef(); fRef->SetOwner(face); - if( !fRef->IsLinked() ) - fRef->Link( &fRenderTargetRefList ); - } - else - { + if (!fRef->IsLinked()) + fRef->Link(&fRenderTargetRefList); + } else { plMetalRenderTargetRef* targetRef = new plMetalRenderTargetRef(); targetRef->SetOwner(face); - face->SetDeviceRef( targetRef ); - ( (plMetalRenderTargetRef *)face->GetDeviceRef())->Link( &fRenderTargetRefList ); + face->SetDeviceRef(targetRef); + ((plMetalRenderTargetRef*)face->GetDeviceRef())->Link(&fRenderTargetRefList); // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) - hsRefCnt_SafeUnRef( face->GetDeviceRef() ); + hsRefCnt_SafeUnRef(face->GetDeviceRef()); } } ref->fTexture = cubeTexture; - } - else - { + } else { hsRefCnt_SafeUnRef(ref); ref = nil; } } // Is it a texture render target? Probably, since shadow maps are all we use this for. - else if( owner->GetFlags() & plRenderTarget::kIsTexture || owner->GetFlags() & plRenderTarget::kIsOffscreen) - { - //DX seperated the onscreen and offscreen types. Metal doesn't care. All render targets are textures. + else if (owner->GetFlags() & plRenderTarget::kIsTexture || owner->GetFlags() & plRenderTarget::kIsOffscreen) { + // DX seperated the onscreen and offscreen types. Metal doesn't care. All render targets are textures. /// Create a normal texture - if( ref != nil ) + if (ref != nil) ref->SetOwner(owner); else { ref = new plMetalRenderTargetRef(); ref->SetOwner(owner); } - + MTL::TextureDescriptor* textureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatBGRA8Unorm, owner->GetWidth(), owner->GetHeight(), false); - //Give compute shader write access + // Give compute shader write access textureDescriptor->setUsage(MTL::TextureUsageRenderTarget | MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); MTL::Texture* texture = fDevice.fMetalDevice->newTexture(textureDescriptor); - if( texture ) - { + if (texture) { ref->fTexture = texture; - } - else - { + } else { hsRefCnt_SafeUnRef(ref); ref = nil; } - + if (owner->GetZDepth() && (owner->GetFlags() & (plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen))) { - MTL::TextureDescriptor *depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, + MTL::TextureDescriptor* depthTextureDescriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatDepth32Float_Stencil8, owner->GetWidth(), owner->GetHeight(), false); - + if (fDevice.fMetalDevice->supportsFamily(MTL::GPUFamilyApple1)) { depthTextureDescriptor->setStorageMode(MTL::StorageModeMemoryless); - } else { + } else { depthTextureDescriptor->setStorageMode(MTL::StorageModePrivate); } depthTextureDescriptor->setUsage(MTL::TextureUsageRenderTarget); - MTL::Texture *depthBuffer = fDevice.fMetalDevice->newTexture(depthTextureDescriptor); + MTL::Texture* depthBuffer = fDevice.fMetalDevice->newTexture(depthTextureDescriptor); ref->fDepthBuffer = depthBuffer; } } - if( owner->GetDeviceRef() != ref ) - { - owner->SetDeviceRef( ref ); + if (owner->GetDeviceRef() != ref) { + owner->SetDeviceRef(ref); // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) - hsRefCnt_SafeUnRef( ref ); - if( ref != nil && !ref->IsLinked() ) - ref->Link( &fRenderTargetRefList ); - } - else - { - if( ref != nil && !ref->IsLinked() ) - ref->Link( &fRenderTargetRefList ); + hsRefCnt_SafeUnRef(ref); + if (ref != nil && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); + } else { + if (ref != nil && !ref->IsLinked()) + ref->Link(&fRenderTargetRefList); } - if( ref != nil ) - { - ref->SetDirty( false ); + if (ref != nil) { + ref->SetDirty(false); } return ref; @@ -3739,68 +3571,64 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe // must be created before we start creating things in POOL_MANAGED. void plMetalPipeline::IMakeRenderTargetPools() { - //FIXME: We should probably have a release function for the render target pools - //IReleaseRenderTargetPools(); // Just to be sure. + // FIXME: We should probably have a release function for the render target pools + // IReleaseRenderTargetPools(); // Just to be sure. // Numbers of render targets to be created for each size. // These numbers were set with multi-player in mind, so should be reconsidered. // But do keep in mind that there are many things in production assets that cast // shadows besides the avatar. - plConst(float) kCount[kMaxRenderTargetNext] = { - 0, // 1x1 - 0, // 2x2 - 0, // 4x4 - 0, // 8x8 - 0, // 16x16 + plConst(float) kCount[kMaxRenderTargetNext] = { + 0, // 1x1 + 0, // 2x2 + 0, // 4x4 + 0, // 8x8 + 0, // 16x16 32, // 32x32 16, // 64x64 - 8, // 128x128 - 4, // 256x256 - 0 // 512x512 + 8, // 128x128 + 4, // 256x256 + 0 // 512x512 }; int i; - for( i = 0; i < kMaxRenderTargetNext; i++ ) - { + for (i = 0; i < kMaxRenderTargetNext; i++) { std::vector* pool = nil; - switch( i ) - { - default: - case 0: - case 1: - case 2: - case 3: - case 4: - break; + switch (i) { + default: + case 0: + case 1: + case 2: + case 3: + case 4: + break; - case 5: - pool = &fRenderTargetPool32; - break; - case 6: - pool = &fRenderTargetPool64; - break; - case 7: - pool = &fRenderTargetPool128; - break; - case 8: - pool = &fRenderTargetPool256; - break; - case 9: - pool = &fRenderTargetPool512; - break; + case 5: + pool = &fRenderTargetPool32; + break; + case 6: + pool = &fRenderTargetPool64; + break; + case 7: + pool = &fRenderTargetPool128; + break; + case 8: + pool = &fRenderTargetPool256; + break; + case 9: + pool = &fRenderTargetPool512; + break; } - if( pool ) - { + if (pool) { pool->resize(kCount[i] + 1); (*pool)[0] = nil; (*pool)[(int)(kCount[i])] = nil; int j; - for( j = 0; j < kCount[i]; j++ ) - { + for (j = 0; j < kCount[i]; j++) { uint16_t flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected; - uint8_t bitDepth = 32; - uint8_t zDepth = 24; - uint8_t stencilDepth = 0; - + uint8_t bitDepth = 32; + uint8_t zDepth = 24; + uint8_t stencilDepth = 0; + // If we ever allow non-square shadows, change this. int width = 1 << i; int height = width; @@ -3810,10 +3638,9 @@ void plMetalPipeline::IMakeRenderTargetPools() // If we've failed to create our render target ref, we're probably out of // video memory. We'll return nil, and this guy just doesn't get a shadow // until more video memory turns up (not likely). - if( !SharedRenderTargetRef((*pool)[0], rt) ) - { + if (!SharedRenderTargetRef((*pool)[0], rt)) { delete rt; - pool->resize(j+1); + pool->resize(j + 1); (*pool)[j] = nil; break; } @@ -3833,7 +3660,7 @@ bool plMetalPipeline::IPopShadowCastState(plShadowSlave* slave) PopRenderTarget(); fView.fXformResetFlags = fView.kResetProjection | fView.kResetCamera; - + fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(nullptr, 16); return true; @@ -3846,14 +3673,13 @@ bool plMetalPipeline::IPopShadowCastState(plShadowSlave* slave) void plMetalPipeline::IResetRenderTargetPools() { int i; - for( i = 0; i < kMaxRenderTargetNext; i++ ) - { + for (i = 0; i < kMaxRenderTargetNext; i++) { fRenderTargetNext[i] = 0; - //fBlurScratchRTs[i] = nil; - //fBlurDestRTs[i] = nil; + // fBlurScratchRTs[i] = nil; + // fBlurDestRTs[i] = nil; } - //fLights.fNextShadowLight = 0; + // fLights.fNextShadowLight = 0; } // IRenderShadowCasterSpan ////////////////////////////////////////////////////////////////////// @@ -3866,41 +3692,40 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp ICheckDynBuffers(drawable, drawable->GetBufferGroup(span.fGroupIdx), &span); plProfile_EndTiming(CheckDyn); - plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef *)drawable->GetVertexRef(span.fGroupIdx, span.fVBufferIdx); - plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef *)drawable->GetIndexRef(span.fGroupIdx, span.fIBufferIdx); + plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)drawable->GetVertexRef(span.fGroupIdx, span.fVBufferIdx); + plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)drawable->GetIndexRef(span.fGroupIdx, span.fIBufferIdx); - if( vRef->GetBuffer() == nil || iRef->GetBuffer() == nil ) - { - hsAssert( false, "Trying to render a nil buffer pair!" ); + if (vRef->GetBuffer() == nil || iRef->GetBuffer() == nil) { + hsAssert(false, "Trying to render a nil buffer pair!"); return; } /// Switch to the vertex buffer we want plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowCasterPipelineState(&fDevice, vRef).GetRenderPipelineState(); - if(fState.fCurrentPipelineState != linkedPipeline->pipelineState) { + if (fState.fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fState.fCurrentPipelineState = linkedPipeline->pipelineState; } - + if (fState.fCurrentVertexBuffer != vRef->GetBuffer()) { fDevice.CurrentRenderCommandEncoder()->setVertexBuffer(vRef->GetBuffer(), 0, 0); fState.fCurrentVertexBuffer = vRef->GetBuffer(); } - + fState.fCurrentVertexBuffer = vRef->GetBuffer(); fDevice.fCurrentIndexBuffer = iRef->GetBuffer(); fState.fCurrentCullMode = MTL::CullModeNone; fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); - uint32_t vStart = span.fVStartIdx; - uint32_t vLength = span.fVLength; - uint32_t iStart = span.fIPackedIdx; - uint32_t iLength= span.fILength; + uint32_t vStart = span.fVStartIdx; + uint32_t vLength = span.fVLength; + uint32_t iStart = span.fIPackedIdx; + uint32_t iLength = span.fILength; plRenderTriListFunc render(&fDevice, 0, vStart, vLength, iStart, iLength); static hsMatrix44 emptyMatrix; - hsMatrix44 m = emptyMatrix; + hsMatrix44 m = emptyMatrix; ISetupTransforms(drawable, span, m); @@ -3910,76 +3735,70 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp render.RenderPrims(); } - // IRenderShadowsOntoSpan ///////////////////////////////////////////////////////////////////// // After doing the usual render for a span (all passes), we call the following. // If the span accepts shadows, this will loop over all the shadows active this // frame, and apply the ones that intersect this spans bounds. See below for details. -void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef *vRef) +void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef* vRef) { // We've already computed which shadows affect this span. That's recorded in slaveBits. const hsBitVector& slaveBits = span->GetShadowSlaves(); bool first = true; - for(size_t i = 0; i < fShadows.size(); i++ ) - { - if( slaveBits.IsBitSet(fShadows[i]->fIndex) ) - { + for (size_t i = 0; i < fShadows.size(); i++) { + if (slaveBits.IsBitSet(fShadows[i]->fIndex)) { // This slave affects this span. - if( first ) - { - + if (first) { // On the first, we do all the setup that is independent of // the shadow slave, so state that needs to get set once before // projecting any number of shadow maps. ISetupShadowRcvTextureStages(mat); first = false; - } // Now setup any state specific to this shadow slave. ISetupShadowSlaveTextures(fShadows[i]); - + // See ISetupShadowLight below for how the shadow light is used. // The shadow light isn't used in generating the shadow map, it's used // in projecting the shadow map onto the scene. plShadowState shadowState; ISetupShadowState(fShadows[i], shadowState); - + struct plMetalFragmentShaderDescription passDescription; memset(&passDescription, 0, sizeof(passDescription)); - + passDescription.numLayers = fCurrNumLayers = 3; - + /* Things get a wee bit complicated here. - + The texture we want to alpha blend with is already bound to texture 0 or texture 1. However - the texture co-ords we want are in position 2 in the FVF vertex buffer. (stage 3) - + Build the shader with texture descriptions set properly for textures 0 and 1, but put the instructions on how to treat the UVW for textures 0 or 1 into the third stage. - + The shadow cast shader will automatically look in textures 0 and 1 when doing the third stage blend. This saves us a texture bind. */ - + passDescription.PopulateTextureInfo(mat->GetLayer(0), 0); passDescription.Populate(mat->GetLayer(0), 2); - - if (mat->GetNumLayers()>1) { + + if (mat->GetNumLayers() > 1) { passDescription.PopulateTextureInfo(mat->GetLayer(1), 1); passDescription.Populate(mat->GetLayer(1), 2); } - //There's no texture for the third stage if we're reusing the textures - //for the first and second stages from the last render. + // There's no texture for the third stage if we're reusing the textures + // for the first and second stages from the last render. passDescription.passTypes[2] = PassTypeColor; - - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); - if(fState.fCurrentPipelineState != linkedPipeline->pipelineState) { + + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); + if (fState.fCurrentPipelineState != linkedPipeline->pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fState.fCurrentPipelineState = linkedPipeline->pipelineState; } @@ -3990,23 +3809,18 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con // so we cache whether the shadow light is set for regular or // self shadowing intensity. If what we're doing now is different // than what we're currently set for, set it again. - if( selfShadowNow != fShadows[i]->fSelfShadowOn ) - { - + if (selfShadowNow != fShadows[i]->fSelfShadowOn) { // We lower the power on self shadowing, because the artists like to // crank up the shadow strength to huge values to get a darker shadow // on the environment, which causes the shadow on the avatar to get // way too dark. Another way to look at it is when self shadowing, // the surface being projected onto is going to be very close to // the surface casting the shadow (because they are the same object). - if( selfShadowNow ) - { + if (selfShadowNow) { plConst(float) kMaxSelfPower = 0.3f; - float power = (float) fShadows[i]->fPower > kMaxSelfPower ? (float) kMaxSelfPower : ((float) fShadows[i]->fPower); + float power = (float)fShadows[i]->fPower > kMaxSelfPower ? (float)kMaxSelfPower : ((float)fShadows[i]->fPower); shadowState.power = power; - } - else - { + } else { shadowState.power = fShadows[i]->fPower; } @@ -4020,23 +3834,20 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con if (!IsDebugFlagSet(plPipeDbg::kFlagNoShadowApply)) #endif // PLASMA_EXTERNAL_RELEASE render.RenderPrims(); - } } - } - // ISetupShadowRcvTextureStages //////////////////////////////////////////// // Set the generic stage states. We'll fill in the specific textures // for each slave later. void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) { - //Do this first, this normally stomps all over our uniforms - //FIXME: Way to encode layers without stomping all over uniforms? - plMetalMaterialShaderRef* matShader = (plMetalMaterialShaderRef *)mat->GetDeviceRef(); - //matShader->encodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, 0, 0, nullptr); - + // Do this first, this normally stomps all over our uniforms + // FIXME: Way to encode layers without stomping all over uniforms? + plMetalMaterialShaderRef* matShader = (plMetalMaterialShaderRef*)mat->GetDeviceRef(); + // matShader->encodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, 0, 0, nullptr); + // We're whacking about with renderstate independent of current material, // so make sure the next span processes it's material, even if it's the // same one. @@ -4046,46 +3857,38 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) ISetShadowLightState(mat); // Zbuffering on read-only - - - if(fState.fCurrentDepthStencilState != fDevice.fNoZWriteStencilState) { + + if (fState.fCurrentDepthStencilState != fDevice.fNoZWriteStencilState) { fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(fDevice.fNoZWriteStencilState); fState.fCurrentDepthStencilState = fDevice.fNoZWriteStencilState; } - + int layerIndex = -1; // If mat's base layer is alpha'd, and we have > 3 TMU's factor // in the base layer's alpha. - if( (fMaxLayersAtOnce > 3) && mat->GetLayer(0)->GetTexture() && (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha) ) - { + if ((fMaxLayersAtOnce > 3) && mat->GetLayer(0)->GetTexture() && (mat->GetLayer(0)->GetBlendFlags() & hsGMatState::kBlendAlpha)) { plLayerInterface* layer = mat->GetLayer(0); layerIndex = 0; - - // If the following conditions are met, it means that layer 1 is a better choice to // get the transparency from. The specific case we're looking for is vertex alpha // simulated by an invisible second layer alpha LUT (known as the alpha hack). - if( (layer->GetMiscFlags() & hsGMatState::kMiscBindNext) - && mat->GetLayer(1) - && !(mat->GetLayer(1)->GetMiscFlags() & hsGMatState::kMiscNoShadowAlpha) - && !(mat->GetLayer(1)->GetBlendFlags() & hsGMatState::kBlendNoTexAlpha) - && mat->GetLayer(1)->GetTexture() ) { - layer = mat->GetLayer(1); + if ((layer->GetMiscFlags() & hsGMatState::kMiscBindNext) && mat->GetLayer(1) && !(mat->GetLayer(1)->GetMiscFlags() & hsGMatState::kMiscNoShadowAlpha) && !(mat->GetLayer(1)->GetBlendFlags() & hsGMatState::kBlendNoTexAlpha) && mat->GetLayer(1)->GetTexture()) { + layer = mat->GetLayer(1); layerIndex = 1; } - + // Normal UVW source. uint32_t uvwSrc = layer->GetUVWSrc(); - - // Normal UVW source. + + // Normal UVW source. fCurrentRenderPassUniforms->uvTransforms[2].UVWSrc = uvwSrc; // MiscFlags to layer's misc flags matrix_float4x4 tXfm; hsMatrix2SIMD(layer->GetTransform(), &tXfm); fCurrentRenderPassUniforms->uvTransforms[2].transform = tXfm; } - + fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&layerIndex, sizeof(int), FragmentShaderArgumentShadowCastAlphaSrc); } @@ -4095,12 +3898,12 @@ void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) { fCurrLightingMethod = plSpan::kLiteShadow; - if( mat && mat->GetNumLayers() && mat->GetLayer(0) ) + if (mat && mat->GetNumLayers() && mat->GetLayer(0)) fCurrentRenderPassUniforms->diffuseCol.r = fCurrentRenderPassUniforms->diffuseCol.g = fCurrentRenderPassUniforms->diffuseCol.b = mat->GetLayer(0)->GetOpacity(); else fCurrentRenderPassUniforms->diffuseCol.r = fCurrentRenderPassUniforms->diffuseCol.g = fCurrentRenderPassUniforms->diffuseCol.b = 1.f; fCurrentRenderPassUniforms->diffuseCol.a = 1.f; - + fCurrentRenderPassUniforms->diffuseSrc = 1.0; fCurrentRenderPassUniforms->emissiveSrc = 1.0; fCurrentRenderPassUniforms->emissiveCol = 0.0; @@ -4114,10 +3917,9 @@ void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) // the surface. void plMetalPipeline::IDisableLightsForShadow() { - //FIXME: Planned for removal - but used by projections. New light code will obsolete. + // FIXME: Planned for removal - but used by projections. New light code will obsolete. int i; - for( i = 0; i < 8; i++ ) - { + for (i = 0; i < 8; i++) { IDisableLight(i); } fLights.count = 0; @@ -4128,7 +3930,7 @@ void plMetalPipeline::IDisableLightsForShadow() // shadow map onto the surface. void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) { - //D3DMATRIX tXfm; + // D3DMATRIX tXfm; hsMatrix44 c2w = GetCameraToWorld(); @@ -4137,24 +3939,24 @@ void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) // Set texture transform to slave's camera to texture transform plRenderTarget* renderTarg = (plRenderTarget*)slave->fPipeData; hsAssert(renderTarg, "Processing a slave that hasn't been rendered"); - if( !renderTarg ) + if (!renderTarg) return; plMetalTextureRef* ref = (plMetalTextureRef*)renderTarg->GetDeviceRef(); hsAssert(ref, "Shadow map ref should have been made when it was rendered"); - if( !ref ) + if (!ref) return; - hsRefCnt_SafeAssign( fLayerRef[0], ref ); + hsRefCnt_SafeAssign(fLayerRef[0], ref); fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(ref->fTexture, 16); plMetalShadowCastFragmentShaderArgumentBuffer uniforms; uniforms.pointLightCast = slave->fView.GetOrthogonal() ? false : true; fDevice.CurrentRenderCommandEncoder()->setFragmentBytes(&uniforms, sizeof(plMetalShadowCastFragmentShaderArgumentBuffer), FragmentShaderArgumentShadowCastUniforms); - - hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w; + + hsMatrix44 cameraToTexture = slave->fWorldToTexture * c2w; simd_float4x4 tXfm; hsMatrix2SIMD(cameraToTexture, &tXfm); - + fCurrentRenderPassUniforms->uvTransforms[0].UVWSrc = plLayerInterface::kUVWPosition; fCurrentRenderPassUniforms->uvTransforms[0].transform = tXfm; @@ -4162,10 +3964,9 @@ void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) // Set the texture transform to slave's fRcvLUT hsMatrix44 cameraToLut = slave->fRcvLUT * c2w; hsMatrix2SIMD(cameraToLut, &tXfm); - + fCurrentRenderPassUniforms->uvTransforms[1].UVWSrc = plLayerInterface::kUVWPosition; fCurrentRenderPassUniforms->uvTransforms[1].transform = tXfm; - } /////////////////////////////////////////////////////////////////////////////// @@ -4178,7 +3979,7 @@ void plMetalPipeline::ISetupShadowSlaveTextures(plShadowSlave* slave) bool plMetalPipeline::IIsViewLeftHanded() { - return fView.GetViewTransform().GetOrthogonal() ^ ( fView.fLocalToWorldLeftHanded ^ fView.fWorldToCamLeftHanded ) ? true : false; + return fView.GetViewTransform().GetOrthogonal() ^ (fView.fLocalToWorldLeftHanded ^ fView.fWorldToCamLeftHanded) ? true : false; } //// ISetCullMode ///////////////////////////////////////////////////////////// @@ -4201,9 +4002,9 @@ plMetalDevice* plMetalPipeline::GetMetalDevice() //// Local Static Stuff /////////////////////////////////////////////////////// -//FIXME: CPU avatar stuff that should be evaluated once this moves onto the GPU. +// FIXME: CPU avatar stuff that should be evaluated once this moves onto the GPU. -template +template static inline void inlCopy(uint8_t*& src, uint8_t*& dst) { T* src_ptr = reinterpret_cast(src); @@ -4213,7 +4014,7 @@ static inline void inlCopy(uint8_t*& src, uint8_t*& dst) dst += sizeof(T); } -template +template static inline const uint8_t* inlExtract(const uint8_t* src, T* val) { const T* ptr = reinterpret_cast(src); @@ -4221,11 +4022,11 @@ static inline const uint8_t* inlExtract(const uint8_t* src, T* val) return reinterpret_cast(ptr); } -template<> +template <> inline const uint8_t* inlExtract(const uint8_t* src, hsPoint3* val) { const float* src_ptr = reinterpret_cast(src); - float* dst_ptr = reinterpret_cast(val); + float* dst_ptr = reinterpret_cast(val); *dst_ptr++ = *src_ptr++; *dst_ptr++ = *src_ptr++; *dst_ptr++ = *src_ptr++; @@ -4233,11 +4034,11 @@ inline const uint8_t* inlExtract(const uint8_t* src, hsPoint3* val) return reinterpret_cast(src_ptr); } -template<> +template <> inline const uint8_t* inlExtract(const uint8_t* src, hsVector3* val) { const float* src_ptr = reinterpret_cast(src); - float* dst_ptr = reinterpret_cast(val); + float* dst_ptr = reinterpret_cast(val); *dst_ptr++ = *src_ptr++; *dst_ptr++ = *src_ptr++; *dst_ptr++ = *src_ptr++; @@ -4245,13 +4046,13 @@ inline const uint8_t* inlExtract(const uint8_t* src, hsVector3* val) return reinterpret_cast(src_ptr); } -template +template static inline void inlSkip(uint8_t*& src) { src += sizeof(T) * N; } -template +template static inline uint8_t* inlStuff(uint8_t* dst, const T* val) { T* ptr = reinterpret_cast(dst); @@ -4287,18 +4088,18 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std: // lock the data buffer // First, figure out which buffers we need to blend. - const int kMaxBufferGroups = 20; - const int kMaxVertexBuffers = 20; + const int kMaxBufferGroups = 20; + const int kMaxVertexBuffers = 20; static char blendBuffers[kMaxBufferGroups][kMaxVertexBuffers]; memset(blendBuffers, 0, kMaxBufferGroups * kMaxVertexBuffers * sizeof(**blendBuffers)); hsAssert(kMaxBufferGroups >= drawable->GetNumBufferGroups(), "Bigger than we counted on num groups skin."); const std::vector& spans = drawable->GetSpanArray(); - int i; + int i; for (i = 0; i < visList.size(); i++) { if (blendBits.IsBitSet(visList[i])) { - const plVertexSpan &vSpan = *(plVertexSpan *)spans[visList[i]]; + const plVertexSpan& vSpan = *(plVertexSpan*)spans[visList[i]]; hsAssert(kMaxVertexBuffers > vSpan.fVBufferIdx, "Bigger than we counted on num buffers skin."); blendBuffers[vSpan.fGroupIdx][vSpan.fVBufferIdx] = 1; @@ -4311,12 +4112,9 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std: // uses it, set the matrix palette and and then do the blend for that span. // When we've done all the spans for a group/buffer, we unlock it and move on. int j; - for( i = 0; i < kMaxBufferGroups; i++ ) - { - for( j = 0; j < kMaxVertexBuffers; j++ ) - { - if( blendBuffers[i][j] ) - { + for (i = 0; i < kMaxBufferGroups; i++) { + for (j = 0; j < kMaxVertexBuffers; j++) { + if (blendBuffers[i][j]) { // Found one. Do the lock. plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)drawable->GetVertexRef(i, j); @@ -4335,15 +4133,15 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std: uint8_t* ptr = vRef->fOwner->GetVertBufferData(vRef->fIndex); ptr += span.fVStartIdx * vRef->fOwner->GetVertexSize(); - IBlendVertBuffer( (plSpan*)&span, - matrixPalette, span.fNumMatrices, - ptr, - vRef->fOwner->GetVertexFormat(), - vRef->fOwner->GetVertexSize(), - destPtr + span.fVStartIdx * vRef->fVertexSize, - vRef->fVertexSize, - span.fVLength, - span.fLocalUVWChans ); + IBlendVertBuffer((plSpan*)&span, + matrixPalette, span.fNumMatrices, + ptr, + vRef->fOwner->GetVertexFormat(), + vRef->fOwner->GetVertexSize(), + destPtr + span.fVStartIdx * vRef->fVertexSize, + vRef->fVertexSize, + span.fVLength, + span.fLocalUVWChans); vRef->SetDirty(true); } } @@ -4363,29 +4161,28 @@ bool plMetalPipeline::ISoftwareVertexBlend(plDrawableSpans* drawable, const std: return true; } - //// IBlendVertsIntoBuffer //////////////////////////////////////////////////// // Given a pointer into a buffer of verts that have blending data in the D3D // format, blends them into the destination buffer given without the blending // info. void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMatrices, - const uint8_t* src, uint8_t format, uint32_t srcStride, - uint8_t* dest, uint32_t destStride, uint32_t count, - uint16_t localUVWChans) + const uint8_t* src, uint8_t format, uint32_t srcStride, + uint8_t* dest, uint32_t destStride, uint32_t count, + uint16_t localUVWChans) { - float pt_buf[] = { 0.f, 0.f, 0.f, 1.f }; - float vec_buf[] = { 0.f, 0.f, 0.f, 0.f }; - hsPoint3* pt = reinterpret_cast(pt_buf); - hsVector3* vec = reinterpret_cast(vec_buf); + float pt_buf[] = {0.f, 0.f, 0.f, 1.f}; + float vec_buf[] = {0.f, 0.f, 0.f, 0.f}; + hsPoint3* pt = reinterpret_cast(pt_buf); + hsVector3* vec = reinterpret_cast(vec_buf); - uint32_t indices; - float weights[4]; + uint32_t indices; + float weights[4]; // Dropped support for localUVWChans at templatization of code hsAssert(localUVWChans == 0, "support for skinned UVWs dropped. reimplement me?"); const size_t uvChanSize = plGBufferGroup::CalcNumUVs(format) * sizeof(float) * 3; - uint8_t numWeights = (format & plGBufferGroup::kSkinWeightMask) >> 4; + uint8_t numWeights = (format & plGBufferGroup::kSkinWeightMask) >> 4; for (uint32_t i = 0; i < count; ++i) { // Extract data @@ -4405,20 +4202,20 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, src = inlExtract(src, vec); // Destination buffers (float4 for SSE alignment) - simd_float4 destNorm_buf = (simd_float4){ 0.f, 0.f, 0.f, 0.f }; - simd_float4 destPt_buf = (simd_float4){ 0.f, 0.f, 0.f, 1.f }; + simd_float4 destNorm_buf = (simd_float4){0.f, 0.f, 0.f, 0.f}; + simd_float4 destPt_buf = (simd_float4){0.f, 0.f, 0.f, 1.f}; simd_float4x4 simdMatrix; - + // Blend for (uint32_t j = 0; j < numWeights + 1; ++j) { hsMatrix2SIMD(matrixPalette[indices & 0xFF], &simdMatrix); if (weights[j]) { - //Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine. - destPt_buf += simd_mul(*(simd_float4 *)pt_buf, simdMatrix) * weights[j]; - destNorm_buf += simd_mul(*(simd_float4 *)vec_buf, simdMatrix) * weights[j]; + // Note: This bit is different than GL/DirectX. It's using acclerate so this is also accelerated on ARM through NEON or maybe even the Neural Engine. + destPt_buf += simd_mul(*(simd_float4*)pt_buf, simdMatrix) * weights[j]; + destNorm_buf += simd_mul(*(simd_float4*)vec_buf, simdMatrix) * weights[j]; } - //ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf); + // ISkinVertexSSE41(matrixPalette[indices & 0xFF], weights[j], pt_buf, destPt_buf, vec_buf, destNorm_buf); indices >>= 8; } // Probably don't really need to renormalize this. There errors are @@ -4431,11 +4228,11 @@ void plMetalPipeline::IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, // Jump past colors and UVws dest += sizeof(uint32_t) * 2 + uvChanSize; - src += sizeof(uint32_t) * 2 + uvChanSize; + src += sizeof(uint32_t) * 2 + uvChanSize; } } -//Resource checking +// Resource checking // CheckTextureRef ////////////////////////////////////////////////////// // Make sure the given layer's texture has background D3D resources allocated. @@ -4451,18 +4248,18 @@ void plMetalPipeline::CheckTextureRef(plLayerInterface* layer) void plMetalPipeline::CheckTextureRef(plBitmap* bitmap) { plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); - + if (!tRef) { tRef = static_cast(MakeTextureRef(bitmap)); } - + // If it's dirty, refill it. if (tRef->IsDirty()) { IReloadTexture(bitmap, tRef); } } -hsGDeviceRef *plMetalPipeline::MakeTextureRef(plBitmap* bitmap) +hsGDeviceRef* plMetalPipeline::MakeTextureRef(plBitmap* bitmap) { plMetalTextureRef* tRef = static_cast(bitmap->GetDeviceRef()); @@ -4481,12 +4278,12 @@ hsGDeviceRef *plMetalPipeline::MakeTextureRef(plBitmap* bitmap) // If it's dirty, refill it. if (tRef->IsDirty()) { - IReloadTexture( bitmap, tRef ); + IReloadTexture(bitmap, tRef); } return tRef; } -void plMetalPipeline::IReloadTexture( plBitmap* bitmap, plMetalTextureRef *ref ) +void plMetalPipeline::IReloadTexture(plBitmap* bitmap, plMetalTextureRef* ref) { plMipmap* mip = plMipmap::ConvertNoRef(bitmap); if (mip) { @@ -4532,12 +4329,9 @@ void plMetalPipeline::CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) // Might want to remove this assert, and replace it with a dirty check // if we have static buffers that change very seldom rather than never. hsAssert(!vRef->IsDirty(), "Non-volatile vertex buffers should never get dirty"); - } - else - { + } else { // Make sure we're going to be ready to fill it. - if (!vRef->fData && (vRef->fFormat != owner->GetVertexFormat())) - { + if (!vRef->fData && (vRef->fFormat != owner->GetVertexFormat())) { vRef->fData = new uint8_t[vRef->fCount * vRef->fVertexSize]; fDevice.FillVolatileVertexBufferRef(vRef, owner, idx); } @@ -4572,23 +4366,21 @@ void plMetalPipeline::CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) //// IGetBufferFormatSize ///////////////////////////////////////////////////// // Calculate the vertex stride from the given format. -uint32_t plMetalPipeline::IGetBufferFormatSize( uint8_t format ) const +uint32_t plMetalPipeline::IGetBufferFormatSize(uint8_t format) const { - uint32_t size = sizeof( float ) * 6 + sizeof( uint32_t ) * 2; // Position and normal, and two packed colors + uint32_t size = sizeof(float) * 6 + sizeof(uint32_t) * 2; // Position and normal, and two packed colors - - switch( format & plGBufferGroup::kSkinWeightMask ) - { + switch (format & plGBufferGroup::kSkinWeightMask) { case plGBufferGroup::kSkinNoWeights: break; case plGBufferGroup::kSkin1Weight: size += sizeof(float); break; default: - hsAssert( false, "Invalid skin weight value in IGetBufferFormatSize()" ); + hsAssert(false, "Invalid skin weight value in IGetBufferFormatSize()"); } - size += sizeof( float ) * 3 * plGBufferGroup::CalcNumUVs( format ); + size += sizeof(float) * 3 * plGBufferGroup::CalcNumUVs(format); return size; } @@ -4599,9 +4391,8 @@ void plMetalPipeline::plMetalPipelineCurrentState::Reset() fCurrentDepthStencilState = nullptr; fCurrentVertexBuffer = nullptr; fCurrentCullMode.reset(); - - for(auto& layer: layerStates) - { + + for (auto& layer : layerStates) { layer.clampFlag = hsGMatState::hsGMatClampFlags(-1); } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 27ee222bad..07af6545f1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -42,15 +42,15 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef _plMetalPipeline_inc_ #define _plMetalPipeline_inc_ -#include "plPipeline/pl3DPipeline.h" -#include "plPipeline/hsG3DDeviceSelector.h" -#include "plMetalDevice.h" -#include #include -#include "ShaderTypes.h" - +#include #include +#include "ShaderTypes.h" +#include "plMetalDevice.h" +#include "plPipeline/hsG3DDeviceSelector.h" +#include "plPipeline/pl3DPipeline.h" + class plIcicle; class plPlate; class plMetalMaterialShaderRef; @@ -64,7 +64,8 @@ const uint kMaxSkinWeightsPerMaterial = 3; class plMetalEnumerate { public: - plMetalEnumerate() { + plMetalEnumerate() + { hsG3DDeviceSelector::AddDeviceEnumerator(&plMetalEnumerate::Enumerate); } @@ -86,94 +87,92 @@ class plRenderPrimFunc class plMetalPipeline : public pl3DPipeline { public: - //The actual client should set this callback so we can retrieve drawables from the window server - std::function currentDrawableCallback; - //caching the frag function here so that the shader compiler can quickly access it - MTL::Function* fFragFunction; - -protected: + // The actual client should set this callback so we can retrieve drawables from the window server + std::function currentDrawableCallback; + // caching the frag function here so that the shader compiler can quickly access it + MTL::Function* fFragFunction; +protected: friend class plMetalDevice; friend class plMetalPlateManager; friend class plMetalMaterialShaderRef; friend class plRenderTriListFunc; friend class plMetalTextFont; - plMetalMaterialShaderRef* fMatRefList; - plMetalRenderTargetRef* fRenderTargetRefList; - + plMetalMaterialShaderRef* fMatRefList; + plMetalRenderTargetRef* fRenderTargetRefList; + public: - plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord *devMode); + plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord* devMode); virtual ~plMetalPipeline(); - + CLASSNAME_REGISTER(plMetalPipeline); GETINTERFACE_ANY(plMetalPipeline, plPipeline); - + /* All of these virtual methods are not implemented by pl3DPipeline and * need to be re-implemented here! */ /*** VIRTUAL METHODS ***/ - bool PreRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr=nullptr) override; - bool PrepForRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr=nullptr) override; - plTextFont* MakeTextFont(ST::string face, uint16_t size) override; - bool OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) override; - bool CloseAccess(plAccessSpan& acc) override; - void PushRenderRequest(plRenderRequest* req) override; - void PopRenderRequest(plRenderRequest* req) override; - void ClearRenderTarget(plDrawable* d) override; - void ClearRenderTarget(const hsColorRGBA* col = nullptr, const float* depth = nullptr) override; - hsGDeviceRef* MakeRenderTargetRef(plRenderTarget* owner) override; - bool BeginRender() override; - bool EndRender() override; - void RenderScreenElements() override; - bool IsFullScreen() const override; - void Resize(uint32_t width, uint32_t height) override; - void LoadResources() override; - bool SetGamma(float eR, float eG, float eB) override; - bool SetGamma(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) override; - bool SetGamma10(const uint16_t *const tabR, const uint16_t *const tabG, const uint16_t *const tabB) override; - bool Supports10BitGamma() const override { return true; }; - bool CaptureScreen(plMipmap* dest, bool flipVertical = false, uint16_t desiredWidth = 0, uint16_t desiredHeight = 0) override; - plMipmap* ExtractMipMap(plRenderTarget* targ) override; - void GetSupportedDisplayModes(std::vector *res, int ColorDepth = 32 ) override; - int GetMaxAnisotropicSamples() override; - int GetMaxAntiAlias(int Width, int Height, int ColorDepth) override; - void ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync = false) override; - void RenderSpans(plDrawableSpans* ice, const std::vector& visList) override; - void ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, hsMatrix44& lastL2W); - bool ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase); - bool IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef); - void IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, - hsGDeviceRef* ib, hsGMaterial* material, - uint32_t vStart, uint32_t vLength, - uint32_t iStart, uint32_t iLength); - void IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux); - void IRenderAuxSpans(const plSpan& span); - bool IHandleMaterialPass(hsGMaterial *material, uint32_t pass, const plSpan *currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders = true); + bool PreRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr = nullptr) override; + bool PrepForRender(plDrawable* drawable, std::vector& visList, plVisMgr* visMgr = nullptr) override; + plTextFont* MakeTextFont(ST::string face, uint16_t size) override; + bool OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) override; + bool CloseAccess(plAccessSpan& acc) override; + void PushRenderRequest(plRenderRequest* req) override; + void PopRenderRequest(plRenderRequest* req) override; + void ClearRenderTarget(plDrawable* d) override; + void ClearRenderTarget(const hsColorRGBA* col = nullptr, const float* depth = nullptr) override; + hsGDeviceRef* MakeRenderTargetRef(plRenderTarget* owner) override; + bool BeginRender() override; + bool EndRender() override; + void RenderScreenElements() override; + bool IsFullScreen() const override; + void Resize(uint32_t width, uint32_t height) override; + void LoadResources() override; + bool SetGamma(float eR, float eG, float eB) override; + bool SetGamma(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) override; + bool SetGamma10(const uint16_t* const tabR, const uint16_t* const tabG, const uint16_t* const tabB) override; + bool Supports10BitGamma() const override { return true; }; + bool CaptureScreen(plMipmap* dest, bool flipVertical = false, uint16_t desiredWidth = 0, uint16_t desiredHeight = 0) override; + plMipmap* ExtractMipMap(plRenderTarget* targ) override; + void GetSupportedDisplayModes(std::vector* res, int ColorDepth = 32) override; + int GetMaxAnisotropicSamples() override; + int GetMaxAntiAlias(int Width, int Height, int ColorDepth) override; + void ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync = false) override; + void RenderSpans(plDrawableSpans* ice, const std::vector& visList) override; + void ISetupTransforms(plDrawableSpans* drawable, const plSpan& span, hsMatrix44& lastL2W); + bool ICheckDynBuffers(plDrawableSpans* drawable, plGBufferGroup* group, const plSpan* spanBase); + bool IRefreshDynVertices(plGBufferGroup* group, plMetalVertexBufferRef* vRef); + void IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, + hsGDeviceRef* ib, hsGMaterial* material, + uint32_t vStart, uint32_t vLength, + uint32_t iStart, uint32_t iLength); + void IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux); + void IRenderAuxSpans(const plSpan& span); + bool IHandleMaterialPass(hsGMaterial* material, uint32_t pass, const plSpan* currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders = true); plMetalDevice* GetMetalDevice(); - + // Create and/or Refresh geometry buffers - void CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) override; - void CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) override; - void CheckTextureRef(plLayerInterface* lay) override; - void CheckTextureRef(plBitmap* bitmap); - hsGDeviceRef *MakeTextureRef(plBitmap* bitmap); - void IReloadTexture( plBitmap* bitmap, plMetalTextureRef *ref ); - - uint32_t IGetBufferFormatSize( uint8_t format ) const; - + void CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) override; + void CheckIndexBufferRef(plGBufferGroup* owner, uint32_t idx) override; + void CheckTextureRef(plLayerInterface* lay) override; + void CheckTextureRef(plBitmap* bitmap); + hsGDeviceRef* MakeTextureRef(plBitmap* bitmap); + void IReloadTexture(plBitmap* bitmap, plMetalTextureRef* ref); + + uint32_t IGetBufferFormatSize(uint8_t format) const; + plRenderTarget* PopRenderTarget() override; - - MTL::PixelFormat GetFramebufferFormat() { return fDevice.GetFramebufferFormat(); }; - void SetFramebufferFormat(MTL::PixelFormat format) { fDevice.SetFramebufferFormat(format); }; - + + MTL::PixelFormat GetFramebufferFormat() { return fDevice.GetFramebufferFormat(); }; + void SetFramebufferFormat(MTL::PixelFormat format) { fDevice.SetFramebufferFormat(format); }; + private: - - VertexUniforms* fCurrentRenderPassUniforms; - + VertexUniforms* fCurrentRenderPassUniforms; + void FindFragFunction(); - + void ISelectLights(const plSpan* span, plMetalMaterialShaderRef* mRef, bool proj = false); void IEnableLight(size_t i, plLightInfo* light); void IDisableLight(size_t i); @@ -181,110 +180,114 @@ class plMetalPipeline : public pl3DPipeline void ICalcLighting(plMetalMaterialShaderRef* mRef, const plLayerInterface* currLayer, const plSpan* currSpan); void IHandleBlendMode(hsGMatState flags); void IHandleZMode(hsGMatState flags); - + void IDrawPlate(plPlate* plate); void IPreprocessAvatarTextures(); void IDrawClothingQuad(float x, float y, float w, float h, - float uOff, float vOff, plMipmap *tex); + float uOff, float vOff, plMipmap* tex); void IClearShadowSlaves(); - + void ICreateDeviceObjects(); void IReleaseDynDeviceObjects(); bool ICreateDynDeviceObjects(); void IReleaseDynamicBuffers(); void IReleaseDeviceObjects(); - + bool IIsViewLeftHanded(); void ISetCullMode(bool flip = false); - + plLayerInterface* IPushOverBaseLayer(plLayerInterface* li); plLayerInterface* IPopOverBaseLayer(plLayerInterface* li); plLayerInterface* IPushOverAllLayer(plLayerInterface* li); plLayerInterface* IPopOverAllLayer(plLayerInterface* li); - - void IPushPiggyBacks(hsGMaterial* mat); - void IPopPiggyBacks(); - void IPushProjPiggyBack(plLayerInterface* li); - void IPopProjPiggyBacks(); + + void IPushPiggyBacks(hsGMaterial* mat); + void IPopPiggyBacks(); + void IPushProjPiggyBack(plLayerInterface* li); + void IPopProjPiggyBacks(); size_t ISetNumActivePiggyBacks(); - bool ICheckAuxBuffers(const plAuxSpan* span); - + bool ICheckAuxBuffers(const plAuxSpan* span); + void ISetPipeConsts(plShader* shader); - bool ISetShaders(const plMetalVertexBufferRef * vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader); - + bool ISetShaders(const plMetalVertexBufferRef* vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader); + bool ISoftwareVertexBlend(plDrawableSpans* drawable, const std::vector& visList); void IBlendVertBuffer(plSpan* span, hsMatrix44* matrixPalette, int numMatrices, - const uint8_t* src, uint8_t format, uint32_t srcStride, - uint8_t* dest, uint32_t destStride, uint32_t count, - uint16_t localUVWChans); - - plMetalVertexShader* fVShaderRefList; - plMetalFragmentShader* fPShaderRefList; - bool IPrepShadowCaster(const plShadowCaster* caster); - bool IRenderShadowCaster(plShadowSlave* slave); - void IPreprocessShadows(); - bool IPushShadowCastState(plShadowSlave* slave); - plRenderTarget* IFindRenderTarget(uint32_t& width, uint32_t& height, bool ortho); - bool IPopShadowCastState(plShadowSlave* slave); - void IResetRenderTargetPools(); - void IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span); - plMetalTextureRef* fULutTextureRef; - void IMakeRenderTargetPools(); - hsGDeviceRef* SharedRenderTargetRef(plRenderTarget* share, plRenderTarget *owner); - void IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef *vRef); - void ISetupShadowRcvTextureStages(hsGMaterial* mat); - void ISetupShadowSlaveTextures(plShadowSlave* slave); - void ISetShadowLightState(hsGMaterial* mat); - void ISetupShadowState(plShadowSlave* slave, plShadowState& shadowState); - void IDisableLightsForShadow(); - void IReleaseRenderTargetPools(); - void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); - void IRenderProjections(const plRenderPrimFunc& render, const plMetalVertexBufferRef* vRef); - void IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li, const plMetalVertexBufferRef* vRef); - - void ISetLayer( uint32_t lay ); - + const uint8_t* src, uint8_t format, uint32_t srcStride, + uint8_t* dest, uint32_t destStride, uint32_t count, + uint16_t localUVWChans); + + plMetalVertexShader* fVShaderRefList; + plMetalFragmentShader* fPShaderRefList; + bool IPrepShadowCaster(const plShadowCaster* caster); + bool IRenderShadowCaster(plShadowSlave* slave); + void IPreprocessShadows(); + bool IPushShadowCastState(plShadowSlave* slave); + plRenderTarget* IFindRenderTarget(uint32_t& width, uint32_t& height, bool ortho); + bool IPopShadowCastState(plShadowSlave* slave); + void IResetRenderTargetPools(); + void IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSpans* drawable, const plIcicle& span); + plMetalTextureRef* fULutTextureRef; + void IMakeRenderTargetPools(); + hsGDeviceRef* SharedRenderTargetRef(plRenderTarget* share, plRenderTarget* owner); + void IRenderShadowsOntoSpan(const plRenderPrimFunc& render, const plSpan* span, hsGMaterial* mat, plMetalVertexBufferRef* vRef); + void ISetupShadowRcvTextureStages(hsGMaterial* mat); + void ISetupShadowSlaveTextures(plShadowSlave* slave); + void ISetShadowLightState(hsGMaterial* mat); + void ISetupShadowState(plShadowSlave* slave, plShadowState& shadowState); + void IDisableLightsForShadow(); + void IReleaseRenderTargetPools(); + void IRenderProjectionEach(const plRenderPrimFunc& render, hsGMaterial* material, int iPass, const plSpan& span, const plMetalVertexBufferRef* vRef); + void IRenderProjections(const plRenderPrimFunc& render, const plMetalVertexBufferRef* vRef); + void IRenderProjection(const plRenderPrimFunc& render, plLightInfo* li, const plMetalVertexBufferRef* vRef); + + void ISetLayer(uint32_t lay); + // Shadows - std::vector fRenderTargetPool512; - std::vector fRenderTargetPool256; - std::vector fRenderTargetPool128; - std::vector fRenderTargetPool64; - std::vector fRenderTargetPool32; - enum { kMaxRenderTargetNext = 10 }; - uint32_t fRenderTargetNext[kMaxRenderTargetNext]; - - std::vector fProjEach; - std::vector fProjAll; - + std::vector fRenderTargetPool512; + std::vector fRenderTargetPool256; + std::vector fRenderTargetPool128; + std::vector fRenderTargetPool64; + std::vector fRenderTargetPool32; + enum + { + kMaxRenderTargetNext = 10 + }; + uint32_t fRenderTargetNext[kMaxRenderTargetNext]; + + std::vector fProjEach; + std::vector fProjAll; + uint32_t fCurrRenderLayer; - - void PushCurrentLightSources(); - void PopCurrentLightSources(); - plMetalLights fLights; - std::vector fLightSourceStack; - + + void PushCurrentLightSources(); + void PopCurrentLightSources(); + plMetalLights fLights; + std::vector fLightSourceStack; + static plMetalEnumerate enumerator; - - plTextFont* fTextFontRefList; - + + plTextFont* fTextFontRefList; + NS::AutoreleasePool* fCurrentPool; - + /// Describes the state for the "fixed function" shader. - struct plMetalPipelineCurrentState { - + struct plMetalPipelineCurrentState + { // notes state of a given layer for a draw pass // index is the offset from the curent root layer // for the draw pass, not the overall index in the // material - struct plMetalPipelineLayerState { + struct plMetalPipelineLayerState + { hsGMatState::hsGMatClampFlags clampFlag; } layerStates[8]; - - std::optional fCurrentCullMode; - const MTL::RenderPipelineState* fCurrentPipelineState; - MTL::Buffer* fCurrentVertexBuffer; - MTL::DepthStencilState* fCurrentDepthStencilState; - + + std::optional fCurrentCullMode; + const MTL::RenderPipelineState* fCurrentPipelineState; + MTL::Buffer* fCurrentVertexBuffer; + MTL::DepthStencilState* fCurrentDepthStencilState; + void Reset(); } fState; }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 48a6550bf5..b0d79bf911 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -41,28 +41,29 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ #include "plMetalPipelineState.h" + #include "plDrawable/plGBufferGroup.h" -#include "plSurface/plLayerInterface.h" -#include "plSurface/hsGMaterial.h" -#include "plMetalDevice.h" -#include "plGImage/plMipmap.h" #include "plGImage/plCubicEnvironmap.h" -#include "plPipeline/plCubicRenderTarget.h" -#include "plPipeline/plRenderTarget.h" +#include "plGImage/plMipmap.h" #include "plMetalDevice.h" #include "plMetalMaterialShaderRef.h" +#include "plPipeline/plCubicRenderTarget.h" +#include "plPipeline/plRenderTarget.h" +#include "plSurface/hsGMaterial.h" +#include "plSurface/plLayerInterface.h" -size_t plMetalPipelineState::GetHash() const { +size_t plMetalPipelineState::GetHash() const +{ return std::hash()(GetID()); } plMetalPipelineState::plMetalPipelineState(plMetalDevice* device) -: fDevice(device) + : fDevice(device) { } plMetalRenderSpanPipelineState::plMetalRenderSpanPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) -: plMetalPipelineState(device) + : plMetalPipelineState(device) { fNumUVs = plGBufferGroup::CalcNumUVs(vRef->fFormat); fNumWeights = (vRef->fFormat & plGBufferGroup::kSkinWeightMask) >> 4; @@ -76,7 +77,8 @@ void plMetalRenderSpanPipelineState::GetFunctionConstants(MTL::FunctionConstantV constants->setConstantValue(&fNumWeights, MTL::DataTypeUChar, FunctionConstantNumWeights); } -size_t plMetalRenderSpanPipelineState::GetHash() const { +size_t plMetalRenderSpanPipelineState::GetHash() const +{ std::size_t h1 = std::hash()(fNumUVs); std::size_t h2 = std::hash()(fNumWeights); std::size_t h3 = std::hash()(fHasSkinIndices); @@ -84,17 +86,19 @@ size_t plMetalRenderSpanPipelineState::GetHash() const { return h1 ^ h2 ^ h3 ^ plMetalPipelineState::GetHash(); } -plMetalDevice::plMetalLinkedPipeline* plMetalPipelineState::GetRenderPipelineState() { +plMetalDevice::plMetalLinkedPipeline* plMetalPipelineState::GetRenderPipelineState() +{ return fDevice->PipelineState(this); } -void plMetalPipelineState::PrewarmRenderPipelineState() { +void plMetalPipelineState::PrewarmRenderPipelineState() +{ fDevice->PrewarmPipelineStateFor(this); } - -plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription &description) -: plMetalRenderSpanPipelineState(device, vRef) { +plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription& description) + : plMetalRenderSpanPipelineState(device, vRef) +{ fFragmentShaderDescription = description; fFragmentShaderDescription.CacheHash(); } @@ -108,84 +112,86 @@ void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstan constants->setConstantValues(&fFragmentShaderDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); } -size_t plMetalMaterialPassPipelineState::GetHash() const { +size_t plMetalMaterialPassPipelineState::GetHash() const +{ std::size_t value = plMetalRenderSpanPipelineState::GetHash(); value ^= fFragmentShaderDescription.GetHash(); return value; } -void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) { +void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) +{ int vertOffset = 0; int skinWeightOffset = vertOffset + (sizeof(float) * 3); - if(this->fHasSkinIndices) { + if (this->fHasSkinIndices) { skinWeightOffset += sizeof(uint32_t); } int normOffset = skinWeightOffset + (sizeof(float) * this->fNumWeights); int colorOffset = normOffset + (sizeof(float) * 3); int baseUvOffset = colorOffset + (sizeof(uint32_t) * 2); int stride = baseUvOffset + (sizeof(float) * 3 * this->fNumUVs); - + vertexDescriptor->attributes()->object(VertexAttributePosition)->setFormat(MTL::VertexFormatFloat3); vertexDescriptor->attributes()->object(VertexAttributePosition)->setBufferIndex(0); vertexDescriptor->attributes()->object(VertexAttributePosition)->setOffset(vertOffset); - + vertexDescriptor->attributes()->object(VertexAttributeNormal)->setFormat(MTL::VertexFormatFloat3); vertexDescriptor->attributes()->object(VertexAttributeNormal)->setBufferIndex(0); vertexDescriptor->attributes()->object(VertexAttributeNormal)->setOffset(normOffset); - - if(this->fNumWeights > 0) { + + if (this->fNumWeights > 0) { int weightOneOffset = skinWeightOffset; - + vertexDescriptor->attributes()->object(VertexAttributeWeights)->setFormat(MTL::VertexFormatFloat); vertexDescriptor->attributes()->object(VertexAttributeWeights)->setBufferIndex(0); vertexDescriptor->attributes()->object(VertexAttributeWeights)->setOffset(weightOneOffset); } - - for(int i=0; ifNumUVs; i++) { - vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setFormat(MTL::VertexFormatFloat3); - vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setBufferIndex(0); - vertexDescriptor->attributes()->object(VertexAttributeTexcoord+i)->setOffset(baseUvOffset + (i * sizeof(float) * 3)); + + for (int i = 0; i < this->fNumUVs; i++) { + vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setFormat(MTL::VertexFormatFloat3); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setBufferIndex(0); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setOffset(baseUvOffset + (i * sizeof(float) * 3)); } - + vertexDescriptor->attributes()->object(VertexAttributeColor)->setFormat(MTL::VertexFormatUChar4); vertexDescriptor->attributes()->object(VertexAttributeColor)->setBufferIndex(0); vertexDescriptor->attributes()->object(VertexAttributeColor)->setOffset(colorOffset); - + vertexDescriptor->layouts()->object(VertexAttributePosition)->setStride(stride); } -void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor *descriptor) +void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor* descriptor) { if (blendMode & hsGMatState::kBlendNoColor) { - //printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); + // printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); return; } - switch (blendMode & hsGMatState::kBlendMask) - { + switch (blendMode & hsGMatState::kBlendMask) { // Detail is just a special case of alpha, handled in construction of the texture // mip chain by making higher levels of the chain more transparent. case hsGMatState::kBlendDetail: case hsGMatState::kBlendAlpha: if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { - //printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); + // printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); } else { - //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); - descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha);; + // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); + ; } descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); } else { if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { - //printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); + // printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); } else { - //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); + // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); } descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); @@ -195,13 +201,13 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Multiply the final color onto the frame buffer. case hsGMatState::kBlendMult: if (blendMode & hsGMatState::kBlendInvertFinalColor) { - //printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); + // printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceColor); } else { - //printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); + // printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceColor); @@ -211,7 +217,7 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Add final color to FB. case hsGMatState::kBlendAdd: - //printf("glBlendFunc(GL_ONE, GL_ONE);\n"); + // printf("glBlendFunc(GL_ONE, GL_ONE);\n"); descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); @@ -219,7 +225,7 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Multiply final color by FB color and add it into the FB. case hsGMatState::kBlendMADD: - //printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); + // printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorDestinationColor); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); break; @@ -227,13 +233,13 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Final color times final alpha, added into the FB. case hsGMatState::kBlendAddColorTimesAlpha: if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { - //printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); + // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); } else { - //printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); + // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); @@ -243,24 +249,23 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Overwrite final color onto FB case 0: - //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + // printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); descriptor->setAlphaBlendOperation(MTL::BlendOperationAdd); - //printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); + // printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); - + /*descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero);*/ break; - default: - { - /*hsAssert(false, "Too many blend modes specified in material"); + default: { + /*hsAssert(false, "Too many blend modes specified in material"); plLayer* lay = plLayer::ConvertNoRef(fCurrMaterial->GetLayer(fCurrLayerIdx)->BottomOfStack()); if( lay ) { @@ -273,50 +278,56 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode lay->SetBlendFlags((lay->GetBlendFlags() & ~hsGMatState::kBlendMask) | hsGMatState::kBlendAdd); } }*/ - } - break; + } break; } } -MTL::Function* plMetalMaterialPassPipelineState::GetVertexFunction(MTL::Library* library) { - NS::Error* error = nullptr; +MTL::Function* plMetalMaterialPassPipelineState::GetVertexFunction(MTL::Library* library) +{ + NS::Error* error = nullptr; MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); this->GetFunctionConstants(constants); MTL::Function* function = library->newFunction( - NS::String::string("pipelineVertexShader", NS::ASCIIStringEncoding), - MakeFunctionConstants(), - &error - )->autorelease(); + NS::String::string("pipelineVertexShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + &error) + ->autorelease(); return function; } -MTL::Function* plMetalMaterialPassPipelineState::GetFragmentFunction(MTL::Library* library) { +MTL::Function* plMetalMaterialPassPipelineState::GetFragmentFunction(MTL::Library* library) +{ return library->newFunction( - NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), - MakeFunctionConstants(), - (NS::Error **)NULL - )->autorelease(); + NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error**)NULL) + ->autorelease(); } -plMetalMaterialPassPipelineState::~plMetalMaterialPassPipelineState() { +plMetalMaterialPassPipelineState::~plMetalMaterialPassPipelineState() +{ } -const NS::String* plMetalMaterialPassPipelineState::GetDescription() { +const NS::String* plMetalMaterialPassPipelineState::GetDescription() +{ return NS::MakeConstantString("Material Pipeline"); } -void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { +void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) +{ uint32_t blendMode = fFragmentShaderDescription.blendModes[0]; ConfigureBlendMode(blendMode, descriptor); } -void plMetalFragmentShaderDescription::Populate(const plLayerInterface* layPtr, const uint8_t index) { +void plMetalFragmentShaderDescription::Populate(const plLayerInterface* layPtr, const uint8_t index) +{ blendModes[index] = layPtr->GetBlendFlags(); miscFlags[index] = layPtr->GetMiscFlags(); PopulateTextureInfo(layPtr, index); } -void plMetalFragmentShaderDescription::PopulateTextureInfo(const plLayerInterface* layPtr, const uint8_t index) { +void plMetalFragmentShaderDescription::PopulateTextureInfo(const plLayerInterface* layPtr, const uint8_t index) +{ plBitmap* texture = layPtr->GetTexture(); if (texture != nullptr) { if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { @@ -326,34 +337,37 @@ void plMetalFragmentShaderDescription::PopulateTextureInfo(const plLayerInterfac } else { passTypes[index] = PassTypeColor; } - + } else { passTypes[index] = PassTypeColor; } - } -bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState &p) const { +bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState& p) const +{ return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fFragmentShaderDescription == this->fFragmentShaderDescription; } -MTL::Function* plMetalRenderShadowPipelineState::GetVertexFunction(MTL::Library* library) { +MTL::Function* plMetalRenderShadowPipelineState::GetVertexFunction(MTL::Library* library) +{ return library->newFunction( - NS::String::string("shadowCastVertexShader", NS::ASCIIStringEncoding), - MakeFunctionConstants(), - (NS::Error **)NULL - )->autorelease(); + NS::String::string("shadowCastVertexShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error**)NULL) + ->autorelease(); } -MTL::Function* plMetalRenderShadowPipelineState::GetFragmentFunction(MTL::Library* library) { +MTL::Function* plMetalRenderShadowPipelineState::GetFragmentFunction(MTL::Library* library) +{ return library->newFunction( - NS::String::string("shadowCastFragmentShader", NS::ASCIIStringEncoding), - MakeFunctionConstants(), - (NS::Error **)NULL - )->autorelease(); + NS::String::string("shadowCastFragmentShader", NS::ASCIIStringEncoding), + MakeFunctionConstants(), + (NS::Error**)NULL) + ->autorelease(); } -void plMetalRenderShadowPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { +void plMetalRenderShadowPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) +{ descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); @@ -362,78 +376,72 @@ void plMetalRenderShadowPipelineState::ConfigureBlend(MTL::RenderPipelineColorAt const MTL::Function* plMetalRenderShadowCasterPipelineState::GetVertexFunction(MTL::Library* library) { - NS::Error* error = nullptr; + NS::Error* error = nullptr; MTL::Function* function = library->newFunction( - NS::MakeConstantString("shadowVertexShader"), - MakeFunctionConstants(), - &error - )->autorelease(); + NS::MakeConstantString("shadowVertexShader"), + MakeFunctionConstants(), + &error) + ->autorelease(); return function; } const MTL::Function* plMetalRenderShadowCasterPipelineState::GetFragmentFunction(MTL::Library* library) { - NS::Error* error = nullptr; + NS::Error* error = nullptr; MTL::Function* function = library->newFunction( - NS::MakeConstantString("shadowFragmentShader"), - MakeFunctionConstants(), - &error - )->autorelease(); + NS::MakeConstantString("shadowFragmentShader"), + MakeFunctionConstants(), + &error) + ->autorelease(); return function; } -const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL::Library *library) { +const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL::Library* library) +{ MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); - MTL::Function* vertFunction; - switch(fVertexShaderID) { + MTL::Function* vertFunction; + switch (fVertexShaderID) { case plShaderID::vs_WaveFixedFin7: vertFunction = library->newFunction( - NS::String::string("vs_WaveFixedFin7", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("vs_WaveFixedFin7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::vs_CompCosines: vertFunction = library->newFunction( - NS::String::string("vs_CompCosines", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("vs_CompCosines", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::vs_BiasNormals: vertFunction = library->newFunction( - NS::String::string("vs_BiasNormals", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("vs_BiasNormals", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::vs_GrassShader: vertFunction = library->newFunction( - NS::String::string("vs_GrassShader", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("vs_GrassShader", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::vs_WaveDecEnv_7: vertFunction = library->newFunction( - NS::String::string("vs_WaveDecEnv_7", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("vs_WaveDecEnv_7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::vs_WaveDec1Lay_7: vertFunction = library->newFunction( - NS::String::string("vs_WaveDec1Lay_7", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("vs_WaveDec1Lay_7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::vs_WaveRip7: vertFunction = library->newFunction( - NS::String::string("vs_WaveRip7", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("vs_WaveRip7", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; default: hsAssert(0, "unknown shader requested"); @@ -441,58 +449,52 @@ const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL: return vertFunction; } -const MTL::Function* plMetalDynamicMaterialPipelineState::GetFragmentFunction(MTL::Library *library) { +const MTL::Function* plMetalDynamicMaterialPipelineState::GetFragmentFunction(MTL::Library* library) +{ MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); - MTL::Function* fragFunction; - switch(fFragmentShaderID) { + MTL::Function* fragFunction; + switch (fFragmentShaderID) { case plShaderID::ps_WaveFixed: fragFunction = library->newFunction( - NS::String::string("ps_WaveFixed", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("ps_WaveFixed", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::ps_MoreCosines: fragFunction = library->newFunction( - NS::String::string("ps_CompCosines", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("ps_CompCosines", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::ps_BiasNormals: fragFunction = library->newFunction( - NS::String::string("ps_BiasNormals", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("ps_BiasNormals", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::ps_GrassShader: fragFunction = library->newFunction( - NS::String::string("ps_GrassShader", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("ps_GrassShader", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::ps_WaveDecEnv: fragFunction = library->newFunction( - NS::String::string("ps_WaveDecEnv", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("ps_WaveDecEnv", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::ps_CbaseAbase: fragFunction = library->newFunction( - NS::String::string("ps_CbaseAbase", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("ps_CbaseAbase", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; case plShaderID::ps_WaveRip: fragFunction = library->newFunction( - NS::String::string("ps_WaveRip", NS::ASCIIStringEncoding), - functionConstants, - (NS::Error **)nullptr - ); + NS::String::string("ps_WaveRip", NS::ASCIIStringEncoding), + functionConstants, + (NS::Error**)nullptr); break; default: hsAssert(0, "unknown shader requested"); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 73e867d76c..21d64fccb3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -44,119 +44,132 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #define plMetalPipelineState_hpp #include + #include #include "plMetalDevice.h" #include "plSurface/plShaderTable.h" -class plMetalPipelineState { +class plMetalPipelineState +{ public: plMetalPipelineState(plMetalDevice* device); plMetalDevice::plMetalLinkedPipeline* GetRenderPipelineState(); - void PrewarmRenderPipelineState(); - bool operator==(const plMetalPipelineState& p) const { + void PrewarmRenderPipelineState(); + bool operator==(const plMetalPipelineState& p) const + { if ((&p)->GetID() != this->GetID()) { return false; } else { return IsEqual(p); } } - virtual size_t GetHash() const; - virtual bool IsEqual(const plMetalPipelineState &p) const = 0; - virtual uint16_t GetID() const { return 0; }; + virtual size_t GetHash() const; + virtual bool IsEqual(const plMetalPipelineState& p) const = 0; + virtual uint16_t GetID() const { return 0; }; virtual plMetalPipelineState* Clone() = 0; - + // - virtual const MTL::Function* GetVertexFunction(MTL::Library* library) = 0; - virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) = 0; - virtual const NS::String* GetDescription() = 0; - + virtual const MTL::Function* GetVertexFunction(MTL::Library* library) = 0; + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) = 0; + virtual const NS::String* GetDescription() = 0; + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) = 0; virtual ~plMetalPipelineState() = default; + protected: - plMetalDevice* fDevice; - virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const = 0; - MTL::FunctionConstantValues* MakeFunctionConstants() { + plMetalDevice* fDevice; + virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const = 0; + MTL::FunctionConstantValues* MakeFunctionConstants() + { MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); this->GetFunctionConstants(constants); return constants; } }; -class plMetalRenderSpanPipelineState: public plMetalPipelineState { +class plMetalRenderSpanPipelineState : public plMetalPipelineState +{ public: plMetalRenderSpanPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef); - virtual bool IsEqual(const plMetalPipelineState &p) const { - const plMetalRenderSpanPipelineState *renderSpanPipelineSate = static_cast(&p); - if( !renderSpanPipelineSate ) { + virtual bool IsEqual(const plMetalPipelineState& p) const + { + const plMetalRenderSpanPipelineState* renderSpanPipelineSate = static_cast(&p); + if (!renderSpanPipelineSate) { return false; } return renderSpanPipelineSate->fNumUVs == fNumUVs && renderSpanPipelineSate->fNumWeights == fNumWeights && renderSpanPipelineSate->fHasSkinIndices == fHasSkinIndices; }; virtual size_t GetHash() const; - - virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; + + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor); - - void ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor *descriptor); + + void ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor* descriptor); + protected: - uint8_t fNumUVs; - uint8_t fNumWeights; - bool fHasSkinIndices; - virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const; - MTL::FunctionConstantValues* MakeFunctionConstants() { + uint8_t fNumUVs; + uint8_t fNumWeights; + bool fHasSkinIndices; + virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const; + MTL::FunctionConstantValues* MakeFunctionConstants() + { MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); this->GetFunctionConstants(constants); return constants; } }; -struct plMetalFragmentShaderDescription { - uint8_t passTypes[8]; - uint32_t blendModes[8]; - uint32_t miscFlags[8]; - uint8_t numLayers; - - size_t hash; - - bool operator==(const plMetalFragmentShaderDescription &p) const { +struct plMetalFragmentShaderDescription +{ + uint8_t passTypes[8]; + uint32_t blendModes[8]; + uint32_t miscFlags[8]; + uint8_t numLayers; + + size_t hash; + + bool operator==(const plMetalFragmentShaderDescription& p) const + { bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0; return match; } - - void CacheHash() { - if(!hash) + + void CacheHash() + { + if (!hash) hash = GetHash(); } - - size_t GetHash() const { - if(hash) + + size_t GetHash() const + { + if (hash) return hash; - + std::size_t value = std::hash()(numLayers); value ^= std::hash()(numLayers); - - for(int i=0;i<8;i++){ - value ^= std::hash()( blendModes[i] ); + + for (int i = 0; i < 8; i++) { + value ^= std::hash()(blendModes[i]); } - - for(int i=0;i<8;i++){ - value ^= std::hash()( miscFlags[i] ); + + for (int i = 0; i < 8; i++) { + value ^= std::hash()(miscFlags[i]); } - - for(int i=0;i<8;i++){ - value ^= std::hash()( passTypes[i] ); + + for (int i = 0; i < 8; i++) { + value ^= std::hash()(passTypes[i]); } - + return value; } - + void Populate(const plLayerInterface* layPtr, const uint8_t index); void PopulateTextureInfo(const plLayerInterface* layPtr, const uint8_t index); }; -template<> +template <> struct std::hash { std::size_t operator()(plMetalFragmentShaderDescription const& s) const noexcept @@ -165,123 +178,139 @@ struct std::hash } }; -class plMetalMaterialPassPipelineState: public plMetalRenderSpanPipelineState { +class plMetalMaterialPassPipelineState : public plMetalRenderSpanPipelineState +{ public: - plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, const plMetalFragmentShaderDescription &description); + plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription& description); virtual size_t GetHash() const override; - MTL::Function* GetVertexFunction(MTL::Library* library) override; - MTL::Function* GetFragmentFunction(MTL::Library* library) override; - - virtual const NS::String* GetDescription() override; - - void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; - - virtual bool IsEqual(const plMetalPipelineState &p) const override; - + MTL::Function* GetVertexFunction(MTL::Library* library) override; + MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + virtual const NS::String* GetDescription() override; + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; + + virtual bool IsEqual(const plMetalPipelineState& p) const override; + virtual uint16_t GetID() const override { return 1; }; - - virtual plMetalPipelineState* Clone() override { + + virtual plMetalPipelineState* Clone() override + { return new plMetalMaterialPassPipelineState(*this); } ~plMetalMaterialPassPipelineState(); virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const override; + protected: plMetalFragmentShaderDescription fFragmentShaderDescription; }; -class plMetalRenderShadowCasterPipelineState: public plMetalRenderSpanPipelineState { +class plMetalRenderShadowCasterPipelineState : public plMetalRenderSpanPipelineState +{ public: plMetalRenderShadowCasterPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef) - : plMetalRenderSpanPipelineState(device, vRef) { - + : plMetalRenderSpanPipelineState(device, vRef) + { } - const MTL::Function* GetVertexFunction(MTL::Library* library) override; - const MTL::Function* GetFragmentFunction(MTL::Library* library) override; - - const NS::String* GetDescription() override { + const MTL::Function* GetVertexFunction(MTL::Library* library) override; + const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + const NS::String* GetDescription() override + { return NS::MakeConstantString("Shadow Caster Pipeline"); }; - - void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override { + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override + { descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); }; virtual uint16_t GetID() const override { return 2; }; - - - virtual plMetalPipelineState* Clone() override { + + virtual plMetalPipelineState* Clone() override + { return new plMetalRenderShadowCasterPipelineState(*this); } - }; -class plMetalRenderShadowPipelineState: public plMetalMaterialPassPipelineState { +class plMetalRenderShadowPipelineState : public plMetalMaterialPassPipelineState +{ public: - plMetalRenderShadowPipelineState(plMetalDevice* device, plMetalVertexBufferRef *vRef, const plMetalFragmentShaderDescription &description) - : plMetalMaterialPassPipelineState(device, vRef, description) { + plMetalRenderShadowPipelineState(plMetalDevice* device, plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription& description) + : plMetalMaterialPassPipelineState(device, vRef, description) + { } - - const NS::String* GetDescription() override { + + const NS::String* GetDescription() override + { return NS::MakeConstantString("Shadow Span Render Pipeline"); }; - MTL::Function* GetVertexFunction(MTL::Library* library) override; - MTL::Function* GetFragmentFunction(MTL::Library* library) override; - void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; - virtual uint16_t GetID() const override { return 3; } ; - - virtual plMetalPipelineState* Clone() override { + MTL::Function* GetVertexFunction(MTL::Library* library) override; + MTL::Function* GetFragmentFunction(MTL::Library* library) override; + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; + virtual uint16_t GetID() const override { return 3; }; + + virtual plMetalPipelineState* Clone() override + { return new plMetalRenderShadowPipelineState(*this); } }; -class plMetalDynamicMaterialPipelineState: public plMetalRenderSpanPipelineState { +class plMetalDynamicMaterialPipelineState : public plMetalRenderSpanPipelineState +{ public: - plMetalDynamicMaterialPipelineState(plMetalDevice* device, const plMetalVertexBufferRef *vRef, uint32_t blendMode, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID) - : plMetalRenderSpanPipelineState(device, vRef), - fVertexShaderID(vertexShaderID), - fFragmentShaderID(fragmentShaderID), - fBlendMode(blendMode) { - - }; - - virtual plMetalPipelineState* Clone() override { + plMetalDynamicMaterialPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, uint32_t blendMode, plShaderID::ID vertexShaderID, plShaderID::ID fragmentShaderID) + : plMetalRenderSpanPipelineState(device, vRef), + fVertexShaderID(vertexShaderID), + fFragmentShaderID(fragmentShaderID), + fBlendMode(blendMode){ + + }; + + virtual plMetalPipelineState* Clone() override + { return new plMetalDynamicMaterialPipelineState(*this); } - - bool IsEqual(const plMetalPipelineState &p) const override { + + bool IsEqual(const plMetalPipelineState& p) const override + { const plMetalDynamicMaterialPipelineState* dynamicState = static_cast(&p); if (!dynamicState) { return false; } - return plMetalRenderSpanPipelineState::IsEqual(p) && dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID && dynamicState->fBlendMode == fBlendMode; + return plMetalRenderSpanPipelineState::IsEqual(p) && dynamicState->fFragmentShaderID == fFragmentShaderID && dynamicState->fVertexShaderID == fVertexShaderID && dynamicState->fBlendMode == fBlendMode; } - - size_t GetHash() const override { + + size_t GetHash() const override + { std::size_t value = std::hash()(fFragmentShaderID); value ^= std::hash()(fVertexShaderID); value ^= std::hash()(fVertexShaderID); value ^= std::hash()(fBlendMode); - + return value ^ plMetalRenderSpanPipelineState::GetHash(); } - - const MTL::Function* GetVertexFunction(MTL::Library *library) override; - const MTL::Function* GetFragmentFunction(MTL::Library *library) override; - - const NS::String *GetDescription() override { + + const MTL::Function* GetVertexFunction(MTL::Library* library) override; + const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + + const NS::String* GetDescription() override + { return NS::MakeConstantString("Dynamic Shader"); } - - void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override { + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override + { ConfigureBlendMode(fBlendMode, descriptor); } + protected: - plShaderID::ID fVertexShaderID; - plShaderID::ID fFragmentShaderID; - uint32_t fBlendMode; + plShaderID::ID fVertexShaderID; + plShaderID::ID fFragmentShaderID; + uint32_t fBlendMode; }; -template<> +template <> struct std::hash { std::size_t operator()(plMetalPipelineState const& s) const noexcept @@ -290,53 +319,60 @@ struct std::hash } }; -class plMetalClearPipelineState: public plMetalPipelineState { +class plMetalClearPipelineState : public plMetalPipelineState +{ public: - plMetalClearPipelineState(plMetalDevice *device, bool shouldClearColor, bool shouldClearDepth): - plMetalPipelineState(device) + plMetalClearPipelineState(plMetalDevice* device, bool shouldClearColor, bool shouldClearDepth) : plMetalPipelineState(device) { fShouldClearDepth = shouldClearDepth; fShouldClearColor = shouldClearColor; } - - virtual bool IsEqual(const plMetalPipelineState &p) const override { + + virtual bool IsEqual(const plMetalPipelineState& p) const override + { const plMetalClearPipelineState* clearState = static_cast(&p); if (!clearState) { return false; } return clearState->fShouldClearDepth == fShouldClearDepth && fShouldClearColor == clearState->fShouldClearColor; }; - - virtual uint16_t GetID() const override { return 4; }; - virtual plMetalPipelineState* Clone() override { + + virtual uint16_t GetID() const override { return 4; }; + virtual plMetalPipelineState* Clone() override + { return new plMetalClearPipelineState(*this); }; - + // - virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override { + virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override + { return library->newFunction(NS::MakeConstantString("clearVertex")); }; - virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override { + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override + { return library->newFunction(NS::MakeConstantString("clearFragment"), MakeFunctionConstants(), - (NS::Error **)NULL - )->autorelease(); + (NS::Error**)NULL) + ->autorelease(); }; - virtual const NS::String* GetDescription() override { + virtual const NS::String* GetDescription() override + { return NS::MakeConstantString("Clear"); }; - - virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override { - //if (fShouldClearColor) { - descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); + + virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override + { + // if (fShouldClearColor) { + descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); + descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); //} else { // descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); // descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); //} }; - - virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override { + + virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override + { vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(0)->setOffset(0); vertexDescriptor->attributes()->object(0)->setBufferIndex(0); @@ -344,25 +380,25 @@ class plMetalClearPipelineState: public plMetalPipelineState { vertexDescriptor->layouts()->object(0)->setStepFunction(MTL::VertexStepFunctionPerVertex); vertexDescriptor->layouts()->object(0)->setStepRate(1); }; - - virtual void GetFunctionConstants(MTL::FunctionConstantValues* values) const override { + + virtual void GetFunctionConstants(MTL::FunctionConstantValues* values) const override + { values->setConstantValue(&fShouldClearDepth, MTL::DataTypeBool, NS::UInteger(0)); values->setConstantValue(&fShouldClearColor, MTL::DataTypeBool, NS::UInteger(1)); } - - virtual size_t GetHash() const override { + + virtual size_t GetHash() const override + { std::size_t value = plMetalPipelineState::GetHash(); value ^= std::hash()(fShouldClearColor); value ^= std::hash()(fShouldClearDepth); - + return value; } - + private: - bool fShouldClearColor; bool fShouldClearDepth; - }; #endif /* plMetalPipelineState_hpp */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 16cc251dd7..186ccae2e4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -41,15 +41,16 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ #include "plMetalPlateManager.h" -#include "plMetalPipeline.h" + #include + #include "ShaderTypes.h" +#include "plMetalPipeline.h" -plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) +plMetalPlateManager::plMetalPlateManager(plMetalPipeline *pipe) : plPlateManager(pipe), - fVtxBuffer(0) + fVtxBuffer(0) { - MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); depthDescriptor->setDepthWriteEnabled(false); @@ -60,7 +61,7 @@ plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) void plMetalPlateManager::ICreateGeometry() { plMetalPipeline *pipeline = (plMetalPipeline *)fOwner; - if(!fVtxBuffer) { + if (!fVtxBuffer) { struct plateVertexBuffer vertexBuffer; vertexBuffer.vertices[0].Set(-0.5f, -0.5f); vertexBuffer.uv[0].Set(0.0f, 0.0f); @@ -75,33 +76,34 @@ void plMetalPlateManager::ICreateGeometry() vertexBuffer.uv[3].Set(1.0f, 1.0f); uint16_t indices[6] = {0, 1, 2, 1, 2, 3}; - + fVtxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&vertexBuffer, sizeof(plateVertexBuffer), MTL::StorageModeManaged); fVtxBuffer->retain(); idxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&indices, sizeof(uint16_t) * 6, MTL::StorageModeManaged); } } -void plMetalPlateManager::EncodeDraw(MTL::RenderCommandEncoder *encoder) { +void plMetalPlateManager::EncodeDraw(MTL::RenderCommandEncoder *encoder) +{ encoder->setVertexBuffer(fVtxBuffer, 0, VertexAttributePosition); encoder->setVertexBuffer(fVtxBuffer, offsetof(plateVertexBuffer, uv), VertexAttributeTexcoord); - + encoder->drawIndexedPrimitives(MTL::PrimitiveTypeTriangle, 6, MTL::IndexTypeUInt16, idxBuffer, 0); } void plMetalPlateManager::IReleaseGeometry() { - if (fVtxBuffer) - { + if (fVtxBuffer) { fVtxBuffer->release(); fVtxBuffer = nullptr; } } -void plMetalPlateManager::IDrawToDevice(plPipeline *pipe) { +void plMetalPlateManager::IDrawToDevice(plPipeline *pipe) +{ plMetalPipeline *pipeline = (plMetalPipeline *)pipe; - plPlate* plate = nullptr; - + plPlate *plate = nullptr; + for (plate = fPlates; plate != nullptr; plate = plate->GetNext()) { if (plate->IsVisible()) { pipeline->IDrawPlate(plate); @@ -114,46 +116,51 @@ plMetalPlateManager::~plMetalPlateManager() IReleaseGeometry(); } - - -bool plMetalPlatePipelineState::IsEqual(const plMetalPipelineState &p) const { +bool plMetalPlatePipelineState::IsEqual(const plMetalPipelineState &p) const +{ return true; } -plMetalPipelineState *plMetalPlatePipelineState::Clone() { +plMetalPipelineState *plMetalPlatePipelineState::Clone() +{ return new plMetalPlatePipelineState(fDevice); } -const MTL::Function *plMetalPlatePipelineState::GetVertexFunction(MTL::Library *library) { +const MTL::Function *plMetalPlatePipelineState::GetVertexFunction(MTL::Library *library) +{ return library->newFunction(NS::MakeConstantString("plateVertexShader")); } -const MTL::Function *plMetalPlatePipelineState::GetFragmentFunction(MTL::Library *library) { +const MTL::Function *plMetalPlatePipelineState::GetFragmentFunction(MTL::Library *library) +{ return library->newFunction(NS::MakeConstantString("fragmentShader")); } -const NS::String *plMetalPlatePipelineState::GetDescription() { +const NS::String *plMetalPlatePipelineState::GetDescription() +{ return NS::MakeConstantString("Plate Pipeline State"); } -void plMetalPlatePipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { +void plMetalPlatePipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) +{ descriptor->setBlendingEnabled(true); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); } -void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) { +void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) +{ vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(0)->setBufferIndex(VertexAttributePosition); vertexDescriptor->attributes()->object(0)->setOffset(0); vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(1)->setBufferIndex(VertexAttributeTexcoord); vertexDescriptor->attributes()->object(1)->setOffset(0); - + vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 2); vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 2); } -void plMetalPlatePipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const { - +void plMetalPlatePipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const +{ } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h index dc74468cee..feaa5b1524 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -43,12 +43,14 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef plMetalPlateManager_hpp #define plMetalPlateManager_hpp +#include #include -#include "plPipeline/plPlates.h" + #include -#include + #include "hsPoint2.h" #include "plMetalPipelineState.h" +#include "plPipeline/plPlates.h" class plMetalPipeline; class plMetalDevice; @@ -56,39 +58,41 @@ class plMetalDevice; class plMetalPlatePipelineState : public plMetalPipelineState { public: - plMetalPlatePipelineState(plMetalDevice* device): plMetalPipelineState(device) { }; - virtual bool IsEqual(const plMetalPipelineState &p) const override; - virtual uint16_t GetID() const override { return 5; }; - virtual plMetalPipelineState* Clone() override; - virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override; - virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override; - virtual const NS::String* GetDescription() override; - + plMetalPlatePipelineState(plMetalDevice *device) : plMetalPipelineState(device){}; + virtual bool IsEqual(const plMetalPipelineState &p) const override; + virtual uint16_t GetID() const override { return 5; }; + virtual plMetalPipelineState *Clone() override; + virtual const MTL::Function *GetVertexFunction(MTL::Library *library) override; + virtual const MTL::Function *GetFragmentFunction(MTL::Library *library) override; + virtual const NS::String *GetDescription() override; + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; - + void ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) override; - + void GetFunctionConstants(MTL::FunctionConstantValues *) const override; - }; class plMetalPlateManager : public plPlateManager { friend class plMetalPipeline; + public: - plMetalPlateManager(plMetalPipeline* pipe); + plMetalPlateManager(plMetalPipeline *pipe); void IDrawToDevice(plPipeline *pipe) override; void ICreateGeometry(); void IReleaseGeometry(); void EncodeDraw(MTL::RenderCommandEncoder *encoder); ~plMetalPlateManager(); + private: - struct plateVertexBuffer { + struct plateVertexBuffer + { hsPoint2 vertices[4]; hsPoint2 uv[4]; }; - MTL::Buffer *fVtxBuffer; - MTL::Buffer *idxBuffer; + MTL::Buffer *fVtxBuffer; + MTL::Buffer *idxBuffer; MTL::DepthStencilState *fDepthState; }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp index c0664feb24..3e0b225e01 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp @@ -39,17 +39,15 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com Mead, WA 99021 *==LICENSE==*/ -#include "HeadSpin.h" - #include "plMetalShader.h" -#include "plSurface/plShader.h" - +#include "HeadSpin.h" #include "plMetalPipeline.h" +#include "plSurface/plShader.h" plMetalShader::plMetalShader(plShader* owner) -: fOwner(owner), - fPipe(nil) + : fOwner(owner), + fPipe(nil) { owner->SetDeviceRef(this); } @@ -58,13 +56,12 @@ plMetalShader::~plMetalShader() { fPipe = nil; - //ISetError(nil); + // ISetError(nil); } void plMetalShader::SetOwner(plShader* owner) { - if( owner != fOwner ) - { + if (owner != fOwner) { Release(); fOwner = owner; owner->SetDeviceRef(this); @@ -81,4 +78,3 @@ void plMetalShader::SetOwner(plShader* owner) return hr; }*/ - diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h index 6706b095e2..8c5c6a1952 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h @@ -43,9 +43,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef plDXShader_inc #define plDXShader_inc -#include "plMetalDeviceRef.h" -#include #include +#include + +#include "plMetalDeviceRef.h" class plShader; class plMetalPipeline; @@ -53,24 +54,24 @@ class plMetalPipeline; class plMetalShader : public plMetalDeviceRef { protected: - plShader* fOwner; - //ST::string fErrorString; - plMetalPipeline* fPipe; - MTL::Function* fFunction; + plShader* fOwner; + // ST::string fErrorString; + plMetalPipeline* fPipe; + MTL::Function* fFunction; - //HRESULT IOnError(HRESULT hr, const char* errStr); - //void ISetError(const char* errStr) { fErrorString = errStr; } + // HRESULT IOnError(HRESULT hr, const char* errStr); + // void ISetError(const char* errStr) { fErrorString = errStr; } - //virtual HRESULT ICreate(plDXPipeline* pipe) = 0; - virtual bool ISetConstants(plMetalPipeline* pipe) = 0; // On error, sets error string. + // virtual HRESULT ICreate(plDXPipeline* pipe) = 0; + virtual bool ISetConstants(plMetalPipeline* pipe) = 0; // On error, sets error string. public: plMetalShader(plShader* owner); virtual ~plMetalShader(); - //ST::string GetErrorString() const { return fErrorString; } - void SetOwner(plShader* owner); - MTL::Function* GetShader(plMetalPipeline* pipe) { return fFunction; }; + // ST::string GetErrorString() const { return fErrorString; } + void SetOwner(plShader* owner); + MTL::Function* GetShader(plMetalPipeline* pipe) { return fFunction; }; }; #endif // plDXShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp index 7c899419ba..1eff7582ab 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp @@ -54,23 +54,22 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "HeadSpin.h" #include "hsWindows.h" - #include "plMetalPipeline.h" #include "plPipeline/hsWinRef.h" - // Following number needs to be at least: 64 chars max in plTextFont drawn at any one time // * 4 primitives per char max (for bold text) // * 3 verts per primitive -//const uint32_t kNumVertsInBuffer(32768); -const uint32_t kNumVertsInBuffer(4608); +// const uint32_t kNumVertsInBuffer(32768); +const uint32_t kNumVertsInBuffer(4608); -uint32_t plMetalTextFont::fBufferCursor = 0; +uint32_t plMetalTextFont::fBufferCursor = 0; //// Constructor & Destructor ///////////////////////////////////////////////// -plMetalTextFont::plMetalTextFont( plPipeline *pipe, plMetalDevice* device ) : plTextFont( pipe ), fTexture() +plMetalTextFont::plMetalTextFont(plPipeline *pipe, plMetalDevice *device) : plTextFont(pipe), + fTexture() { fDevice = device; fPipeline = (plMetalPipeline *)pipe; @@ -84,41 +83,43 @@ plMetalTextFont::~plMetalTextFont() //// ICreateTexture /////////////////////////////////////////////////////////// -void plMetalTextFont::ICreateTexture( uint16_t *data ) +void plMetalTextFont::ICreateTexture(uint16_t *data) { printf("Create texture\n"); - + MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatRGBA8Unorm, fTextureWidth, fTextureHeight, false); - + fTexture->release(); fTexture = fDevice->fMetalDevice->newTexture(descriptor); fTexture->setLabel(NS::MakeConstantString("Font texture")); - - struct InDataValues { - uint8_t a: 4; - uint8_t r: 4; - uint8_t g: 4; - uint8_t b: 4; + + struct InDataValues + { + uint8_t a : 4; + uint8_t r : 4; + uint8_t g : 4; + uint8_t b : 4; }; - - struct OutDataValues { + + struct OutDataValues + { uint8_t r; uint8_t g; uint8_t b; uint8_t a; }; - + uint32_t *outData = new uint32_t[fTextureWidth * fTextureHeight]; - for(int i = 0; i < fTextureWidth * fTextureHeight; i++) { - InDataValues *in = (InDataValues *)(data + i); + for (int i = 0; i < fTextureWidth * fTextureHeight; i++) { + InDataValues *in = (InDataValues *)(data + i); OutDataValues *out = (OutDataValues *)(outData + i); - + out->r = in->r * 255; out->b = in->b * 255; out->g = in->g * 255; out->a = in->a * 255; } - + fTexture->replaceRegion(MTL::Region(0, 0, fTextureWidth, fTextureHeight), 0, outData, 4 * fTextureWidth); delete[] outData; /* @@ -126,7 +127,7 @@ void plMetalTextFont::ICreateTexture( uint16_t *data ) D3DLOCKED_RECT lockInfo; D3DCAPS9 d3dCaps; - + // Check to make sure we can support it fDevice->GetDeviceCaps( &d3dCaps ); hsAssert( fTextureWidth <= d3dCaps.MaxTextureWidth, "Cannot initialize DX font--texture size too big" ); @@ -142,19 +143,19 @@ void plMetalTextFont::ICreateTexture( uint16_t *data ) */ } -void plMetalTextFont::CreateShared(plMetalDevice* device) +void plMetalTextFont::CreateShared(plMetalDevice *device) { } -void plMetalTextFont::ReleaseShared(MTL::Device* device) +void plMetalTextFont::ReleaseShared(MTL::Device *device) { } //// IInitStateBlocks ///////////////////////////////////////////////////////// -void plMetalTextFont::IInitStateBlocks() +void plMetalTextFont::IInitStateBlocks() { -/* + /* for( int i = 0; i < 2; i++ ) { fDevice->BeginStateBlock(); @@ -203,34 +204,34 @@ void plMetalTextFont::IInitStateBlocks() //// DestroyObjects /////////////////////////////////////////////////////////// -void plMetalTextFont::DestroyObjects() +void plMetalTextFont::DestroyObjects() { fInitialized = false; } //// IDrawPrimitive /////////////////////////////////////////////////////////// -void plMetalTextFont::IDrawPrimitive( uint32_t count, plFontVertex *array ) +void plMetalTextFont::IDrawPrimitive(uint32_t count, plFontVertex *array) { - plFontVertex *v; - - plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); - + plFontVertex *v; + + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); - const uint maxCount = 4096/(sizeof(plFontVertex) * 3); - uint drawm = 0; - while(count > 0) { + const uint maxCount = 4096 / (sizeof(plFontVertex) * 3); + uint drawm = 0; + while (count > 0) { uint drawCount = MIN(maxCount, count); - fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array + (drawm * 3), drawCount * 3 * sizeof( plFontVertex ), 0); - + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array + (drawm * 3), drawCount * 3 * sizeof(plFontVertex), 0); + fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), drawCount * 3); - + count -= drawCount; drawm += drawCount; } - //if( !fBuffer ) - // return; + // if( !fBuffer ) + // return; /// Lock the buffer and write to it /*if( fBufferCursor && (fBufferCursor + count * 3 < kNumVertsInBuffer) ) @@ -270,21 +271,21 @@ void plMetalTextFont::IDrawPrimitive( uint32_t count, plFontVertex *array ) //// IDrawLines /////////////////////////////////////////////////////////////// -void plMetalTextFont::IDrawLines( uint32_t count, plFontVertex *array ) +void plMetalTextFont::IDrawLines(uint32_t count, plFontVertex *array) { - plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); - + plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); - fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array, count * 2 * sizeof( plFontVertex ), 0); - + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array, count * 2 * sizeof(plFontVertex), 0); + matrix_float4x4 mat = matrix_identity_float4x4; mat.columns[0][0] = 2.0f / (float)fPipe->Width(); mat.columns[1][1] = -2.0f / (float)fPipe->Height(); mat.columns[3][0] = -1.0; mat.columns[3][1] = 1.0; - fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof( matrix_float4x4 ), 1); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), 1); fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); - + fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeLine, NS::UInteger(0), count * 2); /*if( !fBuffer ) return; @@ -301,7 +302,7 @@ void plMetalTextFont::IDrawLines( uint32_t count, plFontVertex *array ) //// FlushDraws /////////////////////////////////////////////////////////////// // Flushes out and finishes any drawing left to be done. -void plMetalTextFont::FlushDraws() +void plMetalTextFont::FlushDraws() { /*if( !fBuffer ) return; @@ -318,15 +319,14 @@ void plMetalTextFont::FlushDraws() //// SaveStates /////////////////////////////////////////////////////////////// -void plMetalTextFont::SaveStates() +void plMetalTextFont::SaveStates() { - matrix_float4x4 mat = matrix_identity_float4x4; mat.columns[0][0] = 2.0f / (float)fPipe->Width(); mat.columns[1][1] = -2.0f / (float)fPipe->Height(); mat.columns[3][0] = -1.0; mat.columns[3][1] = 1.0; - fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof( matrix_float4x4 ), 1); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), 1); fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); /*if( !fInitialized ) IInitObjects(); @@ -353,47 +353,52 @@ void plMetalTextFont::SaveStates() //// RestoreStates //////////////////////////////////////////////////////////// -void plMetalTextFont::RestoreStates() +void plMetalTextFont::RestoreStates() { /*if (fOldStateBlock) fOldStateBlock->Apply(); - + fDevice->SetTexture(0, nullptr); fDevice->SetTransform( D3DTS_TEXTURE0, &d3dIdentityMatrix );*/ } - - -bool plMetalTextFontPipelineState::IsEqual(const plMetalPipelineState &p) const { +bool plMetalTextFontPipelineState::IsEqual(const plMetalPipelineState &p) const +{ return true; } -plMetalPipelineState *plMetalTextFontPipelineState::Clone() { +plMetalPipelineState *plMetalTextFontPipelineState::Clone() +{ return new plMetalTextFontPipelineState(fDevice); } -const MTL::Function *plMetalTextFontPipelineState::GetVertexFunction(MTL::Library *library) { +const MTL::Function *plMetalTextFontPipelineState::GetVertexFunction(MTL::Library *library) +{ return library->newFunction(NS::MakeConstantString("textFontVertexShader")); } -const MTL::Function *plMetalTextFontPipelineState::GetFragmentFunction(MTL::Library *library) { +const MTL::Function *plMetalTextFontPipelineState::GetFragmentFunction(MTL::Library *library) +{ return library->newFunction(NS::MakeConstantString("textFontFragmentShader")); } -const NS::String *plMetalTextFontPipelineState::GetDescription() { +const NS::String *plMetalTextFontPipelineState::GetDescription() +{ return NS::MakeConstantString("Font Rendering"); } -void plMetalTextFontPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) { - +void plMetalTextFontPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) +{ descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); } -void plMetalTextFontPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) { +void plMetalTextFontPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) +{ return; } -void plMetalTextFontPipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const { +void plMetalTextFontPipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const +{ return; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h index 35fa5dbcf8..514bffe231 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -42,11 +42,11 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #ifndef _plDXTextFont_h #define _plDXTextFont_h -#include "plPipeline/plTextFont.h" -#include "plMetalPipeline.h" -#include "plMetalPipelineState.h" #include +#include "plMetalPipeline.h" +#include "plMetalPipelineState.h" +#include "plPipeline/plTextFont.h" //// plDXTextFont Class Definition /////////////////////////////////////////// @@ -56,50 +56,47 @@ class plMetalDevice; class plMetalTextFontPipelineState : public plMetalPipelineState { public: - plMetalTextFontPipelineState(plMetalDevice* device): plMetalPipelineState(device) { }; - virtual bool IsEqual(const plMetalPipelineState &p) const override; - virtual uint16_t GetID() const override { return 6; }; + plMetalTextFontPipelineState(plMetalDevice* device) : plMetalPipelineState(device){}; + virtual bool IsEqual(const plMetalPipelineState& p) const override; + virtual uint16_t GetID() const override { return 6; }; virtual plMetalPipelineState* Clone() override; virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override; virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override; virtual const NS::String* GetDescription() override; - - void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; - - void ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) override; - - void GetFunctionConstants(MTL::FunctionConstantValues *) const override; - + + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; + + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override; + + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; }; class plMetalTextFont : public plTextFont { protected: - static uint32_t fBufferCursor; - - void ICreateTexture(uint16_t *data) override; - void IInitStateBlocks() override; - void IDrawPrimitive(uint32_t count, plFontVertex *array) override; - void IDrawLines(uint32_t count, plFontVertex *array) override; - - MTL::Texture* fTexture; - plMetalDevice* fDevice; - + static uint32_t fBufferCursor; + + void ICreateTexture(uint16_t* data) override; + void IInitStateBlocks() override; + void IDrawPrimitive(uint32_t count, plFontVertex* array) override; + void IDrawLines(uint32_t count, plFontVertex* array) override; + + MTL::Texture* fTexture; + plMetalDevice* fDevice; + plMetalPipeline* fPipeline; public: - plMetalTextFont( plPipeline *pipe, plMetalDevice *device ); + plMetalTextFont(plPipeline* pipe, plMetalDevice* device); ~plMetalTextFont(); - static void CreateShared(plMetalDevice* device); - static void ReleaseShared(MTL::Device* device); + static void CreateShared(plMetalDevice* device); + static void ReleaseShared(MTL::Device* device); - void FlushDraws() override; - void SaveStates() override; - void RestoreStates() override; - void DestroyObjects() override; + void FlushDraws() override; + void SaveStates() override; + void RestoreStates() override; + void DestroyObjects() override; }; - #endif // _plDXTextFont_h - diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp index 0f9a94c507..ebe2aa0a8d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp @@ -39,20 +39,18 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com Mead, WA 99021 *==LICENSE==*/ -#include "HeadSpin.h" -#include "hsWindows.h" - -#include - #include "plMetalVertexShader.h" -#include "plSurface/plShader.h" +#include +#include "HeadSpin.h" +#include "hsWindows.h" #include "plDrawable/plGBufferGroup.h" #include "plMetalPipeline.h" +#include "plSurface/plShader.h" plMetalVertexShader::plMetalVertexShader(plShader* owner) -: plMetalShader(owner) + : plMetalShader(owner) { } @@ -65,17 +63,15 @@ void plMetalVertexShader::Release() { fPipe = nil; - //ISetError(nil); + // ISetError(nil); } bool plMetalVertexShader::ISetConstants(plMetalPipeline* pipe) { - if( fOwner->GetNumConsts() ) - { - float *ptr = (float *)fOwner->GetConstBasePtr(); - pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); + if (fOwner->GetNumConsts()) { + float* ptr = (float*)fOwner->GetConstBasePtr(); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); } return true; } - diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h index 3a096d097a..6ae2329107 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h @@ -51,16 +51,14 @@ class plMetalPipeline; class plMetalVertexShader : public plMetalShader { protected: - - public: - virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. + virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. plMetalVertexShader(plShader* owner); virtual ~plMetalVertexShader(); - virtual void Release(); - void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalVertexShader* GetNext() { return (plMetalVertexShader*)fNext; } + virtual void Release(); + void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + plMetalVertexShader* GetNext() { return (plMetalVertexShader*)fNext; } }; #endif // plMetalVertexShader_inc From a0277c6e55cdf2fbcb251cbff0dbd824359653e7 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 18 Oct 2023 19:29:48 -0700 Subject: [PATCH 141/165] Adding comment for Metal UBO encoding Or rather the lack of UBO encoding. --- .../pfMetalPipeline/plMetalMaterialShaderRef.cpp | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 44d9135ad1..48ff1ee224 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -120,6 +120,19 @@ void plMetalMaterialShaderRef::CheckMateralRef() // fast encode doesn't support piggybacks or push over layers, but it does use preloaded data on the GPU so it's much faster. Use this encoder if there are no piggybacks or pushover layers void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass) { + /* + NOTE: + This code is all that remains of the UBO path - which has slowly been cut down + by piggybacks interfering with UBOs, and a lot of uniforms moving into precompiled + sections of the shaders. + + plMetalFragmentShaderArgumentBuffer literally just has one float left - which could + be factored out. The only reason this code hasn't been deleted is because plates + still relies on it - but plates also needs to be updated anyway. + + UBOs in theory are more efficient. So we either need to figure out how to do UBOs + or finally delete this code for good. + */ for (uint32_t i = GetPassIndex(pass); i < GetPassIndex(pass) + fPassLengths[pass]; i++) { plLayerInterface* layer = fMaterial->GetLayer(i); From f45c306e27d8b69f42ab0d50867c70cc4d4b3ae0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 18 Oct 2023 19:30:59 -0700 Subject: [PATCH 142/165] Fixing header guards --- .../FeatureLib/pfMetalPipeline/plMetalPipelineState.h | 6 +++--- Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 21d64fccb3..85ce9317e7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -40,8 +40,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ -#ifndef plMetalPipelineState_hpp -#define plMetalPipelineState_hpp +#ifndef plMetalPipelineState_h +#define plMetalPipelineState_h #include @@ -401,4 +401,4 @@ class plMetalClearPipelineState : public plMetalPipelineState bool fShouldClearDepth; }; -#endif /* plMetalPipelineState_hpp */ +#endif /* plMetalPipelineState_h */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h index 514bffe231..6fdc708e34 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -39,8 +39,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com Mead, WA 99021 *==LICENSE==*/ -#ifndef _plDXTextFont_h -#define _plDXTextFont_h +#ifndef _plMetalTextFont_h +#define _plMetalTextFont_h #include @@ -99,4 +99,4 @@ class plMetalTextFont : public plTextFont void DestroyObjects() override; }; -#endif // _plDXTextFont_h +#endif // _plMetalTextFont_h From 448fbc62a1ae3149d5796cf3db62f8e25ddb11e7 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 19 Oct 2023 21:52:00 -0700 Subject: [PATCH 143/165] Documeting/cleaning up pipeline state header --- .../pfMetalPipeline/plMetalPipelineState.cpp | 19 ++--------------- .../pfMetalPipeline/plMetalPipelineState.h | 21 +++++++++++++------ 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index b0d79bf911..834c7f713e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -178,20 +178,16 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode case hsGMatState::kBlendAlpha: if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { - // printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); } else { - // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); ; } descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); } else { if (blendMode & hsGMatState::kBlendAlphaPremultiplied) { - // printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); } else { - // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); } descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); @@ -201,13 +197,11 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Multiply the final color onto the frame buffer. case hsGMatState::kBlendMult: if (blendMode & hsGMatState::kBlendInvertFinalColor) { - // printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceColor); descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOneMinusSourceColor); } else { - // printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorZero); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceColor); @@ -217,7 +211,6 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Add final color to FB. case hsGMatState::kBlendAdd: - // printf("glBlendFunc(GL_ONE, GL_ONE);\n"); descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); @@ -225,7 +218,6 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Multiply final color by FB color and add it into the FB. case hsGMatState::kBlendMADD: - // printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorDestinationColor); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); break; @@ -233,13 +225,11 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Final color times final alpha, added into the FB. case hsGMatState::kBlendAddColorTimesAlpha: if (blendMode & hsGMatState::kBlendInvertFinalAlpha) { - // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorOne); } else { - // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); @@ -249,19 +239,12 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode // Overwrite final color onto FB case 0: - // printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); descriptor->setRgbBlendOperation(MTL::BlendOperationAdd); descriptor->setAlphaBlendOperation(MTL::BlendOperationAdd); - // printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); descriptor->setSourceAlphaBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationAlphaBlendFactor(MTL::BlendFactorZero); - - /*descriptor->colorAttachments()->object(0)->setSourceRGBBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setSourceAlphaBlendFactor(MTL::BlendFactorOne); - descriptor->colorAttachments()->object(0)->setDestinationRGBBlendFactor(MTL::BlendFactorZero); - descriptor->colorAttachments()->object(0)->setDestinationAlphaBlendFactor(MTL::BlendFactorZero);*/ break; default: { @@ -400,6 +383,7 @@ const MTL::Function* plMetalDynamicMaterialPipelineState::GetVertexFunction(MTL: { MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); MTL::Function* vertFunction; + // map the original engine vertex shader id to the pixel shader function switch (fVertexShaderID) { case plShaderID::vs_WaveFixedFin7: vertFunction = library->newFunction( @@ -453,6 +437,7 @@ const MTL::Function* plMetalDynamicMaterialPipelineState::GetFragmentFunction(MT { MTL::FunctionConstantValues* functionConstants = MakeFunctionConstants(); MTL::Function* fragFunction; + // map the original engine pixel shader id to the pixel shader function switch (fFragmentShaderID) { case plShaderID::ps_WaveFixed: fragFunction = library->newFunction( diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 85ce9317e7..e6a87e32ce 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -50,6 +50,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMetalDevice.h" #include "plSurface/plShaderTable.h" +//MARK: Base pipeline state + class plMetalPipelineState { public: @@ -89,6 +91,8 @@ class plMetalPipelineState } }; +//MARK: Abstract FVF vertex shader program parent type + class plMetalRenderSpanPipelineState : public plMetalPipelineState { public: @@ -121,6 +125,8 @@ class plMetalRenderSpanPipelineState : public plMetalPipelineState } }; +//MARK: Fixed function emulating material program + struct plMetalFragmentShaderDescription { uint8_t passTypes[8]; @@ -205,6 +211,8 @@ class plMetalMaterialPassPipelineState : public plMetalRenderSpanPipelineState plMetalFragmentShaderDescription fFragmentShaderDescription; }; +//MARK: Shadow casting program + class plMetalRenderShadowCasterPipelineState : public plMetalRenderSpanPipelineState { public: @@ -233,6 +241,8 @@ class plMetalRenderShadowCasterPipelineState : public plMetalRenderSpanPipelineS } }; +//MARK: Shadow rendering program + class plMetalRenderShadowPipelineState : public plMetalMaterialPassPipelineState { public: @@ -256,6 +266,8 @@ class plMetalRenderShadowPipelineState : public plMetalMaterialPassPipelineState } }; +//MARK: Shader based render programs + class plMetalDynamicMaterialPipelineState : public plMetalRenderSpanPipelineState { public: @@ -319,6 +331,8 @@ struct std::hash } }; +//MARK: Clear buffer program + class plMetalClearPipelineState : public plMetalPipelineState { public: @@ -343,11 +357,11 @@ class plMetalClearPipelineState : public plMetalPipelineState return new plMetalClearPipelineState(*this); }; - // virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override { return library->newFunction(NS::MakeConstantString("clearVertex")); }; + virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override { return library->newFunction(NS::MakeConstantString("clearFragment"), @@ -362,13 +376,8 @@ class plMetalClearPipelineState : public plMetalPipelineState virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override { - // if (fShouldClearColor) { descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); - //} else { - // descriptor->setSourceRGBBlendFactor(MTL::BlendFactorZero); - // descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOne); - //} }; virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override From da3da39b5873a8e1bd328b715e86a1c9485dfe82 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 3 Nov 2023 20:37:04 -0700 Subject: [PATCH 144/165] Apply lint pass and adopting MTLSTR --- .../pfMetalPipeline/plMetalDevice.cpp | 9 +-- .../pfMetalPipeline/plMetalDeviceRef.h | 10 +-- .../pfMetalPipeline/plMetalFragmentShader.h | 4 +- .../plMetalMaterialShaderRef.cpp | 4 +- .../plMetalMaterialShaderRef.h | 4 +- .../pfMetalPipeline/plMetalPipeline.cpp | 6 +- .../pfMetalPipeline/plMetalPipelineState.cpp | 12 ++-- .../pfMetalPipeline/plMetalPipelineState.h | 66 +++++++++---------- .../pfMetalPipeline/plMetalPlateManager.cpp | 8 +-- .../pfMetalPipeline/plMetalPlateManager.h | 12 ++-- .../pfMetalPipeline/plMetalTextFont.cpp | 8 +-- .../pfMetalPipeline/plMetalTextFont.h | 12 ++-- .../pfMetalPipeline/plMetalVertexShader.h | 9 +-- 13 files changed, 83 insertions(+), 81 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 0e99c550f0..1c8371d743 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -219,7 +219,7 @@ void plMetalDevice::BeginNewRenderPass() // lazilly create the screen render encoder if it does not yet exist if (!fCurrentOffscreenCommandBuffer && !fCurrentRenderTargetCommandEncoder) { - SetRenderTarget(NULL); + SetRenderTarget(nullptr); } if (fCurrentRenderTargetCommandEncoder) { @@ -1000,7 +1000,8 @@ void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condi } if (fNewPipelineStateMap[record] != NULL) { - return fNewPipelineStateMap[record]; + // The shader is already compiled. + return; } MTL::Library* library = fMetalDevice->newDefaultLibrary(); @@ -1181,8 +1182,8 @@ void plMetalDevice::CreateGammaAdjustState() MTL::RenderPipelineDescriptor* gammaDescriptor = MTL::RenderPipelineDescriptor::alloc()->init(); MTL::Library* library = fMetalDevice->newDefaultLibrary(); - gammaDescriptor->setVertexFunction(library->newFunction(NS::MakeConstantString("gammaCorrectVertex"))->autorelease()); - gammaDescriptor->setFragmentFunction(library->newFunction(NS::MakeConstantString("gammaCorrectFragment"))->autorelease()); + gammaDescriptor->setVertexFunction(library->newFunction(MTLSTR("gammaCorrectVertex"))->autorelease()); + gammaDescriptor->setFragmentFunction(library->newFunction(MTLSTR("gammaCorrectFragment"))->autorelease()); library->release(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h index 5f4709b2a0..e30df0db03 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -140,7 +140,7 @@ class plMetalBufferPoolRef : public plMetalDeviceRef fBuffers[fCurrentFrame][fCurrentPass] = fBuffer; } - void Release() + void Release() override { for (int i = 0; i < 3; i++) { for (auto buffer : fBuffers[i]) { @@ -205,7 +205,7 @@ class plMetalVertexBufferRef : public plMetalBufferPoolRef void Link(plMetalVertexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalVertexBufferRef* GetNext() { return (plMetalVertexBufferRef*)fNext; } - void Release(); + void Release() override; }; class plMetalIndexBufferRef : public plMetalBufferPoolRef @@ -232,7 +232,7 @@ class plMetalIndexBufferRef : public plMetalBufferPoolRef bool Expired(uint32_t t) const { return Volatile() && (IsDirty() || (fRefTime != t)); } void SetRefTime(uint32_t t) { fRefTime = t; } - void Release(); + void Release() override; void Link(plMetalIndexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalIndexBufferRef* GetNext() { return (plMetalIndexBufferRef*)fNext; } @@ -269,7 +269,7 @@ class plMetalTextureRef : public plMetalDeviceRef virtual ~plMetalTextureRef(); - void Release(); + void Release() override; }; class plMetalRenderTargetRef : public plMetalTextureRef @@ -286,7 +286,7 @@ class plMetalRenderTargetRef : public plMetalTextureRef virtual ~plMetalRenderTargetRef(); - void Release(); + void Release() override; virtual void SetOwner(plRenderTarget* targ) { fOwner = (plBitmap*)targ; } }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h index d2ac66e265..427469a7ee 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h @@ -52,11 +52,11 @@ class plMetalFragmentShader : public plMetalShader { protected: public: - virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. + bool ISetConstants(plMetalPipeline* pipe) override; // On error, sets error string. plMetalFragmentShader(plShader* owner); virtual ~plMetalFragmentShader(); - virtual void Release(); + void Release() override; void Link(plMetalFragmentShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalFragmentShader* GetNext() { return (plMetalFragmentShader*)fNext; } }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 48ff1ee224..ddf4568e31 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -272,7 +272,7 @@ void plMetalMaterialShaderRef::ILoopOverLayers() for (int layerOffset = 0; layerOffset < j - currLayer; layerOffset++) { plLayerInterface* layer = fMaterial->GetLayer(currLayer + layerOffset); layers[layerOffset] = layer; - IBuildLayerTexture(NULL, layerOffset, layer); + IBuildLayerTexture(nullptr, layerOffset, layer); } fPasses.push_back(layers); @@ -402,7 +402,7 @@ bool plMetalMaterialShaderRef::ICanEatLayer(plLayerInterface* lay) return true; } -uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription* passDescription, plMetalFragmentShaderArgumentBuffer* uniforms, std::vector* piggybacks, std::function preEncodeTransform, std::function postEncodeTransform) +uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription* passDescription, plMetalFragmentShaderArgumentBuffer* uniforms, std::vector* piggybacks, const std::function& preEncodeTransform, const std::function& postEncodeTransform) { if (!fMaterial || layer >= fMaterial->GetNumLayers() || !fMaterial->GetLayer(layer)) { return -1; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index d2e3a8fe88..e1830ba7a6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -75,7 +75,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef plMetalMaterialShaderRef(hsGMaterial *mat, plMetalPipeline *pipe); ~plMetalMaterialShaderRef(); - void Release(); + void Release() override; void CheckMateralRef(); uint32_t GetNumPasses() const { return fNumPasses; } @@ -104,7 +104,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void ILoopOverLayers(); uint32_t fNumPasses; - uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, std::function preEncodeTransform, std::function postEncodeTransform); + uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, const std::function& preEncodeTransform, const std::function& postEncodeTransform); bool ICanEatLayer(plLayerInterface *lay); uint32_t ILayersAtOnce(uint32_t which); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 7afbec5536..050b1ef3a6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1243,7 +1243,7 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, // Projections that get applied to the frame buffer (after all passes). if (fProjAll.size() && !(fView.fRenderState & kRenderNoProjection)) { - fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(NS::MakeConstantString("Render All Projections")); + fDevice.CurrentRenderCommandEncoder()->pushDebugGroup(MTLSTR("Render All Projections")); IRenderProjections(render, vRef); fDevice.CurrentRenderCommandEncoder()->popDebugGroup(); } @@ -2760,8 +2760,8 @@ void plMetalPipeline::IPreprocessAvatarTextures() MTL::RenderPipelineDescriptor* descriptor = MTL::RenderPipelineDescriptor::alloc()->init()->autorelease(); MTL::Library* library = fDevice.fMetalDevice->newDefaultLibrary()->autorelease(); - MTL::Function* vertFunction = library->newFunction(NS::MakeConstantString("PreprocessAvatarVertexShader"))->autorelease(); - MTL::Function* fragFunction = library->newFunction(NS::MakeConstantString("PreprocessAvatarFragmentShader"))->autorelease(); + MTL::Function* vertFunction = library->newFunction(MTLSTR("PreprocessAvatarVertexShader"))->autorelease(); + MTL::Function* fragFunction = library->newFunction(MTLSTR("PreprocessAvatarFragmentShader"))->autorelease(); descriptor->setVertexFunction(vertFunction); descriptor->setFragmentFunction(fragFunction); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 834c7f713e..2f662b7a8c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -283,7 +283,7 @@ MTL::Function* plMetalMaterialPassPipelineState::GetFragmentFunction(MTL::Librar return library->newFunction( NS::String::string("pipelineFragmentShader", NS::ASCIIStringEncoding), MakeFunctionConstants(), - (NS::Error**)NULL) + (NS::Error**)nullptr) ->autorelease(); } @@ -293,7 +293,7 @@ plMetalMaterialPassPipelineState::~plMetalMaterialPassPipelineState() const NS::String* plMetalMaterialPassPipelineState::GetDescription() { - return NS::MakeConstantString("Material Pipeline"); + return MTLSTR("Material Pipeline"); } void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) @@ -336,7 +336,7 @@ MTL::Function* plMetalRenderShadowPipelineState::GetVertexFunction(MTL::Library* return library->newFunction( NS::String::string("shadowCastVertexShader", NS::ASCIIStringEncoding), MakeFunctionConstants(), - (NS::Error**)NULL) + (NS::Error**)nullptr) ->autorelease(); } @@ -345,7 +345,7 @@ MTL::Function* plMetalRenderShadowPipelineState::GetFragmentFunction(MTL::Librar return library->newFunction( NS::String::string("shadowCastFragmentShader", NS::ASCIIStringEncoding), MakeFunctionConstants(), - (NS::Error**)NULL) + (NS::Error**)nullptr) ->autorelease(); } @@ -361,7 +361,7 @@ const MTL::Function* plMetalRenderShadowCasterPipelineState::GetVertexFunction(M { NS::Error* error = nullptr; MTL::Function* function = library->newFunction( - NS::MakeConstantString("shadowVertexShader"), + MTLSTR("shadowVertexShader"), MakeFunctionConstants(), &error) ->autorelease(); @@ -372,7 +372,7 @@ const MTL::Function* plMetalRenderShadowCasterPipelineState::GetFragmentFunction { NS::Error* error = nullptr; MTL::Function* function = library->newFunction( - NS::MakeConstantString("shadowFragmentShader"), + MTLSTR("shadowFragmentShader"), MakeFunctionConstants(), &error) ->autorelease(); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index e6a87e32ce..00f83516a9 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -97,7 +97,7 @@ class plMetalRenderSpanPipelineState : public plMetalPipelineState { public: plMetalRenderSpanPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef); - virtual bool IsEqual(const plMetalPipelineState& p) const + bool IsEqual(const plMetalPipelineState& p) const override { const plMetalRenderSpanPipelineState* renderSpanPipelineSate = static_cast(&p); if (!renderSpanPipelineSate) { @@ -105,10 +105,10 @@ class plMetalRenderSpanPipelineState : public plMetalPipelineState } return renderSpanPipelineSate->fNumUVs == fNumUVs && renderSpanPipelineSate->fNumWeights == fNumWeights && renderSpanPipelineSate->fHasSkinIndices == fHasSkinIndices; }; - virtual size_t GetHash() const; + size_t GetHash() const override; - virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; - virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor); + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override = 0; + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override; void ConfigureBlendMode(const uint32_t blendMode, MTL::RenderPipelineColorAttachmentDescriptor* descriptor); @@ -116,7 +116,7 @@ class plMetalRenderSpanPipelineState : public plMetalPipelineState uint8_t fNumUVs; uint8_t fNumWeights; bool fHasSkinIndices; - virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const; + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; MTL::FunctionConstantValues* MakeFunctionConstants() { MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); @@ -188,24 +188,24 @@ class plMetalMaterialPassPipelineState : public plMetalRenderSpanPipelineState { public: plMetalMaterialPassPipelineState(plMetalDevice* device, const plMetalVertexBufferRef* vRef, const plMetalFragmentShaderDescription& description); - virtual size_t GetHash() const override; + size_t GetHash() const override; MTL::Function* GetVertexFunction(MTL::Library* library) override; MTL::Function* GetFragmentFunction(MTL::Library* library) override; - virtual const NS::String* GetDescription() override; + const NS::String* GetDescription() override; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; - virtual bool IsEqual(const plMetalPipelineState& p) const override; + bool IsEqual(const plMetalPipelineState& p) const override; - virtual uint16_t GetID() const override { return 1; }; + uint16_t GetID() const override { return 1; }; - virtual plMetalPipelineState* Clone() override + plMetalPipelineState* Clone() override { return new plMetalMaterialPassPipelineState(*this); } ~plMetalMaterialPassPipelineState(); - virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const override; + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; protected: plMetalFragmentShaderDescription fFragmentShaderDescription; @@ -225,7 +225,7 @@ class plMetalRenderShadowCasterPipelineState : public plMetalRenderSpanPipelineS const NS::String* GetDescription() override { - return NS::MakeConstantString("Shadow Caster Pipeline"); + return MTLSTR("Shadow Caster Pipeline"); }; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override @@ -233,9 +233,9 @@ class plMetalRenderShadowCasterPipelineState : public plMetalRenderSpanPipelineS descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); }; - virtual uint16_t GetID() const override { return 2; }; + uint16_t GetID() const override { return 2; }; - virtual plMetalPipelineState* Clone() override + plMetalPipelineState* Clone() override { return new plMetalRenderShadowCasterPipelineState(*this); } @@ -253,14 +253,14 @@ class plMetalRenderShadowPipelineState : public plMetalMaterialPassPipelineState const NS::String* GetDescription() override { - return NS::MakeConstantString("Shadow Span Render Pipeline"); + return MTLSTR("Shadow Span Render Pipeline"); }; MTL::Function* GetVertexFunction(MTL::Library* library) override; MTL::Function* GetFragmentFunction(MTL::Library* library) override; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; - virtual uint16_t GetID() const override { return 3; }; + uint16_t GetID() const override { return 3; }; - virtual plMetalPipelineState* Clone() override + plMetalPipelineState* Clone() override { return new plMetalRenderShadowPipelineState(*this); } @@ -279,7 +279,7 @@ class plMetalDynamicMaterialPipelineState : public plMetalRenderSpanPipelineStat }; - virtual plMetalPipelineState* Clone() override + plMetalPipelineState* Clone() override { return new plMetalDynamicMaterialPipelineState(*this); } @@ -308,7 +308,7 @@ class plMetalDynamicMaterialPipelineState : public plMetalRenderSpanPipelineStat const NS::String* GetDescription() override { - return NS::MakeConstantString("Dynamic Shader"); + return MTLSTR("Dynamic Shader"); } void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override @@ -342,7 +342,7 @@ class plMetalClearPipelineState : public plMetalPipelineState fShouldClearColor = shouldClearColor; } - virtual bool IsEqual(const plMetalPipelineState& p) const override + bool IsEqual(const plMetalPipelineState& p) const override { const plMetalClearPipelineState* clearState = static_cast(&p); if (!clearState) { @@ -351,36 +351,36 @@ class plMetalClearPipelineState : public plMetalPipelineState return clearState->fShouldClearDepth == fShouldClearDepth && fShouldClearColor == clearState->fShouldClearColor; }; - virtual uint16_t GetID() const override { return 4; }; - virtual plMetalPipelineState* Clone() override + uint16_t GetID() const override { return 4; }; + plMetalPipelineState* Clone() override { return new plMetalClearPipelineState(*this); }; - virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override + const MTL::Function* GetVertexFunction(MTL::Library* library) override { - return library->newFunction(NS::MakeConstantString("clearVertex")); + return library->newFunction(MTLSTR("clearVertex")); }; - virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override + const MTL::Function* GetFragmentFunction(MTL::Library* library) override { - return library->newFunction(NS::MakeConstantString("clearFragment"), + return library->newFunction(MTLSTR("clearFragment"), MakeFunctionConstants(), - (NS::Error**)NULL) + (NS::Error**)nullptr) ->autorelease(); }; - virtual const NS::String* GetDescription() override + const NS::String* GetDescription() override { - return NS::MakeConstantString("Clear"); + return MTLSTR("Clear"); }; - virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override { descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorZero); }; - virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override { vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(0)->setOffset(0); @@ -390,13 +390,13 @@ class plMetalClearPipelineState : public plMetalPipelineState vertexDescriptor->layouts()->object(0)->setStepRate(1); }; - virtual void GetFunctionConstants(MTL::FunctionConstantValues* values) const override + void GetFunctionConstants(MTL::FunctionConstantValues* values) const override { values->setConstantValue(&fShouldClearDepth, MTL::DataTypeBool, NS::UInteger(0)); values->setConstantValue(&fShouldClearColor, MTL::DataTypeBool, NS::UInteger(1)); } - virtual size_t GetHash() const override + size_t GetHash() const override { std::size_t value = plMetalPipelineState::GetHash(); value ^= std::hash()(fShouldClearColor); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 186ccae2e4..23805c8288 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -49,7 +49,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com plMetalPlateManager::plMetalPlateManager(plMetalPipeline *pipe) : plPlateManager(pipe), - fVtxBuffer(0) + fVtxBuffer(nullptr) { MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); @@ -128,17 +128,17 @@ plMetalPipelineState *plMetalPlatePipelineState::Clone() const MTL::Function *plMetalPlatePipelineState::GetVertexFunction(MTL::Library *library) { - return library->newFunction(NS::MakeConstantString("plateVertexShader")); + return library->newFunction(MTLSTR("plateVertexShader")); } const MTL::Function *plMetalPlatePipelineState::GetFragmentFunction(MTL::Library *library) { - return library->newFunction(NS::MakeConstantString("fragmentShader")); + return library->newFunction(MTLSTR("fragmentShader")); } const NS::String *plMetalPlatePipelineState::GetDescription() { - return NS::MakeConstantString("Plate Pipeline State"); + return MTLSTR("Plate Pipeline State"); } void plMetalPlatePipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h index feaa5b1524..60e8cfbc5b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -59,12 +59,12 @@ class plMetalPlatePipelineState : public plMetalPipelineState { public: plMetalPlatePipelineState(plMetalDevice *device) : plMetalPipelineState(device){}; - virtual bool IsEqual(const plMetalPipelineState &p) const override; - virtual uint16_t GetID() const override { return 5; }; - virtual plMetalPipelineState *Clone() override; - virtual const MTL::Function *GetVertexFunction(MTL::Library *library) override; - virtual const MTL::Function *GetFragmentFunction(MTL::Library *library) override; - virtual const NS::String *GetDescription() override; + bool IsEqual(const plMetalPipelineState &p) const override; + uint16_t GetID() const override { return 5; }; + plMetalPipelineState *Clone() override; + const MTL::Function *GetVertexFunction(MTL::Library *library) override; + const MTL::Function *GetFragmentFunction(MTL::Library *library) override; + const NS::String *GetDescription() override; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp index 1eff7582ab..ac7a398fe6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp @@ -91,7 +91,7 @@ void plMetalTextFont::ICreateTexture(uint16_t *data) fTexture->release(); fTexture = fDevice->fMetalDevice->newTexture(descriptor); - fTexture->setLabel(NS::MakeConstantString("Font texture")); + fTexture->setLabel(MTLSTR("Font texture")); struct InDataValues { @@ -374,17 +374,17 @@ plMetalPipelineState *plMetalTextFontPipelineState::Clone() const MTL::Function *plMetalTextFontPipelineState::GetVertexFunction(MTL::Library *library) { - return library->newFunction(NS::MakeConstantString("textFontVertexShader")); + return library->newFunction(MTLSTR("textFontVertexShader")); } const MTL::Function *plMetalTextFontPipelineState::GetFragmentFunction(MTL::Library *library) { - return library->newFunction(NS::MakeConstantString("textFontFragmentShader")); + return library->newFunction(MTLSTR("textFontFragmentShader")); } const NS::String *plMetalTextFontPipelineState::GetDescription() { - return NS::MakeConstantString("Font Rendering"); + return MTLSTR("Font Rendering"); } void plMetalTextFontPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h index 6fdc708e34..f5eed85271 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -57,12 +57,12 @@ class plMetalTextFontPipelineState : public plMetalPipelineState { public: plMetalTextFontPipelineState(plMetalDevice* device) : plMetalPipelineState(device){}; - virtual bool IsEqual(const plMetalPipelineState& p) const override; - virtual uint16_t GetID() const override { return 6; }; - virtual plMetalPipelineState* Clone() override; - virtual const MTL::Function* GetVertexFunction(MTL::Library* library) override; - virtual const MTL::Function* GetFragmentFunction(MTL::Library* library) override; - virtual const NS::String* GetDescription() override; + bool IsEqual(const plMetalPipelineState& p) const override; + uint16_t GetID() const override { return 6; }; + plMetalPipelineState* Clone() override; + const MTL::Function* GetVertexFunction(MTL::Library* library) override; + const MTL::Function* GetFragmentFunction(MTL::Library* library) override; + const NS::String* GetDescription() override; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h index 6ae2329107..0364dd789c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h @@ -52,13 +52,14 @@ class plMetalVertexShader : public plMetalShader { protected: public: - virtual bool ISetConstants(plMetalPipeline* pipe); // On error, sets error string. + bool ISetConstants(plMetalPipeline* pipe) override; // On error, sets error string. plMetalVertexShader(plShader* owner); virtual ~plMetalVertexShader(); - - virtual void Release(); - void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } + + void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalVertexShader* GetNext() { return (plMetalVertexShader*)fNext; } + + void Release() override; }; #endif // plMetalVertexShader_inc From b0428cdec84dc8355cfd4d1865c75a2c5bedfa41 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 3 Nov 2023 20:35:17 -0700 Subject: [PATCH 145/165] Moving some code to static_cast I likely have not found all the C style casts. --- .../pfMetalPipeline/plMetalPipeline.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 050b1ef3a6..570830dd00 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1055,7 +1055,7 @@ void plMetalPipeline::RenderSpans(plDrawableSpans* ice, const std::vectorGetDeviceRef(); + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); if (mRef == nullptr) { mRef = new plMetalMaterialShaderRef(material, this); @@ -1144,9 +1144,9 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, plProfile_BeginTiming(RenderBuff); - plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)vb; - plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)ib; - plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + plMetalVertexBufferRef* vRef = static_cast(vb); + plMetalIndexBufferRef* iRef = static_cast(ib); + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); mRef->CheckMateralRef(); if (!vRef || !vRef->GetBuffer() || !iRef->GetBuffer()) { @@ -1352,7 +1352,7 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM for (k = 0; k < fProjEach.size(); k++) { // Push it's projected texture as a piggyback. plLightInfo* li = fProjEach[k]; - plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); plLayerInterface* proj = li->GetProjection(); hsAssert(proj, "A projector with no texture to project?"); @@ -1449,7 +1449,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) // Now just loop through the aux material, rendering in as many passes as it takes. hsGMaterial* material = aux->fMaterial; - plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); if (mRef == nullptr) { mRef = new plMetalMaterialShaderRef(material, this); @@ -1483,7 +1483,7 @@ void plMetalPipeline::IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux) bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, const plSpan* currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders) { - plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); fCurrLayerIdx = mRef->GetPassIndex(pass); plLayerInterface* lay = material->GetLayer(mRef->GetPassIndex(pass)); @@ -2465,7 +2465,7 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) IPushPiggyBacks(material); // First, do we have a device ref at this index? - plMetalMaterialShaderRef* mRef = (plMetalMaterialShaderRef*)material->GetDeviceRef(); + plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); if (mRef == nullptr) { mRef = new plMetalMaterialShaderRef(material, this); @@ -3845,7 +3845,7 @@ void plMetalPipeline::ISetupShadowRcvTextureStages(hsGMaterial* mat) { // Do this first, this normally stomps all over our uniforms // FIXME: Way to encode layers without stomping all over uniforms? - plMetalMaterialShaderRef* matShader = (plMetalMaterialShaderRef*)mat->GetDeviceRef(); + plMetalMaterialShaderRef* matShader = static_cast(mat->GetDeviceRef()); // matShader->encodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, 0, 0, nullptr); // We're whacking about with renderstate independent of current material, From 7bc5803e698477f6385f338c6a8a1a83b023b164 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 3 Nov 2023 23:54:57 -0700 Subject: [PATCH 146/165] Fixing bundle identifier warning --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 515560534c..273ac98269 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -177,6 +177,7 @@ if(APPLE) XCODE_ATTRIBUTE_CODE_SIGN_ENTITLEMENTS "${CMAKE_CURRENT_SOURCE_DIR}/Mac-Cocoa/plClient.entitlements" XCODE_ATTRIBUTE_ENABLE_HARDENED_RUNTIME "YES" XCODE_ATTRIBUTE_CLANG_ENABLE_OBJC_ARC "YES" + XCODE_ATTRIBUTE_PRODUCT_BUNDLE_IDENTIFIER org.Huru.UruExplorer ) target_compile_options(plClient PRIVATE -fobjc-arc) target_sources(plClient PRIVATE Mac-Cocoa/Assets.xcassets) From d9f0cb219a405b9c91cc08d10cdfce25e1e9cae3 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 4 Nov 2023 00:19:56 -0700 Subject: [PATCH 147/165] Removing delegate message filter for window size change. This was preventing a delegate message from properly setting Plasma resolution on launch. --- Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm index ef2c973705..a2a89b74d7 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSView.mm @@ -276,12 +276,6 @@ - (void)resizeDrawable:(CGFloat)scaleFactor } #if PLASMA_PIPELINE_METAL - if (newSize.width == _metalLayer.drawableSize.width && - newSize.height == _metalLayer.drawableSize.height) - { - return; - } - _metalLayer.drawableSize = newSize; #endif [self.delegate renderView:self From 9568467cb966977d61570291b6b92c23b596bb23 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 4 Nov 2023 13:48:04 -0700 Subject: [PATCH 148/165] First set of feedback changes --- .../Plasma/Apps/plClient/Mac-Cocoa/main.mm | 16 +- .../pfMetalPipeline/ShaderSrc/Clear.metal | 10 +- .../ShaderSrc/FixedPipelineShaders.metal | 54 +++-- .../ShaderSrc/GammaCorrection.metal | 8 +- .../pfMetalPipeline/ShaderSrc/Grass.metal | 6 +- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 4 +- .../ShaderSrc/TextFontShader.metal | 2 +- .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 2 +- .../pfMetalPipeline/plMetalDevice.cpp | 97 +++++---- .../pfMetalPipeline/plMetalDevice.h | 2 +- .../pfMetalPipeline/plMetalDeviceRef.cpp | 78 -------- .../pfMetalPipeline/plMetalDeviceRef.h | 63 +++--- .../pfMetalPipeline/plMetalDeviceRefs.cpp | 7 +- .../pfMetalPipeline/plMetalEnumerate.h | 13 -- .../pfMetalPipeline/plMetalFragmentShader.cpp | 4 +- .../pfMetalPipeline/plMetalFragmentShader.h | 4 +- .../plMetalMaterialShaderRef.cpp | 46 +++-- .../plMetalMaterialShaderRef.h | 31 ++- .../pfMetalPipeline/plMetalPipeline.cpp | 187 ++++++++---------- .../pfMetalPipeline/plMetalPipelineState.cpp | 34 ++-- .../pfMetalPipeline/plMetalPipelineState.h | 53 +++-- .../pfMetalPipeline/plMetalPlateManager.cpp | 24 +-- .../pfMetalPipeline/plMetalPlateManager.h | 20 +- .../pfMetalPipeline/plMetalShader.cpp | 17 +- .../pfMetalPipeline/plMetalShader.h | 10 +- 25 files changed, 345 insertions(+), 447 deletions(-) delete mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp delete mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index 278761d7a4..c859146d27 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -42,10 +42,10 @@ // System Frameworks #import -#if PLASMA_PIPELINE_GL +#ifdef PLASMA_PIPELINE_GL #import #endif -#if PLASMA_PIPELINE_METAL +#ifdef PLASMA_PIPELINE_METAL #import #endif #import @@ -69,11 +69,11 @@ #include "plCmdParser.h" #include "pfConsoleCore/pfConsoleEngine.h" #include "pfGameGUIMgr/pfGameGUIMgr.h" -#if PLASMA_PIPELINE_GL +#ifdef PLASMA_PIPELINE_GL #include "pfGLPipeline/plGLPipeline.h" #endif #include "plInputCore/plInputDevice.h" -#if PLASMA_PIPELINE_METAL +#ifdef PLASMA_PIPELINE_METAL #include "pfMetalPipeline/plMetalPipeline.h" #endif #include "plMessage/plDisplayScaleChangedMsg.h" @@ -468,12 +468,12 @@ - (void)startClient gClient.SetClientWindow((hsWindowHndl)(__bridge void*)self.window); gClient.SetClientDisplay((hsWindowHndl)NULL); -#if PLASMA_PIPELINE_METAL +#ifdef PLASMA_PIPELINE_METAL plMetalPipeline *pipeline = (plMetalPipeline *)gClient->GetPipeline(); pipeline->currentDrawableCallback = [self] (MTL::Device* device) { id< CAMetalDrawable > drawable; id metalDevice = (__bridge id)device; - if(((CAMetalLayer *) _renderLayer).device != metalDevice) { + if (((CAMetalLayer *) _renderLayer).device != metalDevice) { ((CAMetalLayer *) _renderLayer).device = metalDevice; dispatch_async(dispatch_get_main_queue(), ^{ [self updateWindowTitle]; @@ -488,12 +488,12 @@ - (void)startClient if (!gClient) { exit(0); } - + self.eventMonitor = [[PLSKeyboardEventMonitor alloc] initWithView:self.window.contentView inputManager:&gClient]; ((PLSView*)self.window.contentView).inputManager = gClient->GetInputManager(); [self.window makeFirstResponder:self.window.contentView]; - + // Main loop if (gClient && !gClient->GetDone()) { [self startRunLoop]; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal index 5bc3c0cbb4..7669bdc516 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal @@ -70,16 +70,14 @@ vertex ClearVertexOut clearVertex(ClearVertexIn in [[ stage_in ]]) return out; } -fragment ClearFragmentOut clearFragment( - constant half4& clearColor [[ buffer(0), function_constant(ShouldClearColor) ]], - constant float& clearDepth [[ buffer(1), function_constant(ShouldClearDepth) ]] - ) +fragment ClearFragmentOut clearFragment(constant half4& clearColor [[ buffer(0), function_constant(ShouldClearColor) ]], + constant float& clearDepth [[ buffer(1), function_constant(ShouldClearDepth) ]]) { ClearFragmentOut out; - if(ShouldClearDepth) { + if (ShouldClearDepth) { out.depth = clearDepth; } - if(ShouldClearColor) { + if (ShouldClearColor) { out.color = clearColor; } return out; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index e6c310aca1..2fb497d31e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -138,7 +138,7 @@ struct FragmentShaderArguments { texturecube cubicTexture8 [[ texture(FragmentShaderArgumentAttributeCubicTextures + 7), function_constant(hasCubicTexture8) ]]; const constant plMetalFragmentShaderArgumentBuffer* bufferedUniforms [[ buffer(FragmentShaderArgumentUniforms) ]]; half4 sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const; - //number of layers is variable, so have to declare these samplers the ugly way + // number of layers is variable, so have to declare these samplers the ugly way sampler samplers [[ sampler(0), function_constant(hasLayer1) ]]; sampler sampler2 [[ sampler(1), function_constant(hasLayer2) ]]; sampler sampler3 [[ sampler(2), function_constant(hasLayer3) ]]; @@ -160,10 +160,8 @@ typedef struct float3 texCoord6 [[function_constant(hasLayer6)]]; float3 texCoord7 [[function_constant(hasLayer7)]]; float3 texCoord8 [[function_constant(hasLayer8)]]; - //float4 normal; half4 vtxColor [[ centroid_perspective ]]; half4 fogColor; - //float4 vCamNormal; } ColorInOut; @@ -179,13 +177,12 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constant float4x4 & blendMatrix1 [[ buffer(VertexShaderArgumentBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) { ColorInOut out; - //we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. + // we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. const half4 inColor = half4(in.color.b, in.color.g, in.color.r, in.color.a) / half4(255.0f); const half3 MAmbient = mix(inColor.rgb, uniforms.ambientCol, uniforms.ambientSrc); const half4 MDiffuse = mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc); const half3 MEmissive = mix(inColor.rgb, uniforms.emissiveCol, uniforms.emissiveSrc); - //const half4 MSpecular = half4(mix(inColor, uniforms.specularCol, uniforms.specularSrc)); half3 LAmbient = half3(0.0, 0.0, 0.0); half3 LDiffuse = half3(0.0, 0.0, 0.0); @@ -193,17 +190,17 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz; float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix); - if(temp_hasOnlyWeight1) { + if (temp_hasOnlyWeight1) { const float4 position2 = blendMatrix1 * float4(in.position, 1.0); position = (in.weight1 * position) + ((1.0f - in.weight1) * position2); } for (size_t i = 0; i < lights.count; i++) { constant const plMetalShaderLightSource *lightSource = &lights.lampSources[i]; - if(lightSource->scale == 0.0h) + if (lightSource->scale == 0.0h) continue; - //w is attenation + // w is attenation float4 direction; if (lightSource->position.w == 0.0) { @@ -220,9 +217,9 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], if (lightSource->spotProps.x > 0.0) { // Spot Light with cone falloff const float theta = dot(direction.xyz, normalize(-lightSource->direction).xyz); - //inner cutoff + // inner cutoff const float gamma = lightSource->spotProps.y; - //outer cutoff + // outer cutoff const float phi = lightSource->spotProps.z; const float epsilon = (gamma - phi); const float intensity = clamp((theta - phi) / epsilon, 0.0, 1.0); @@ -243,14 +240,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); const float4 vCamPosition = position * uniforms.worldToCameraMatrix; - //out.vCamNormal = uniforms.worldToCameraMatrix * (uniforms.localToWorldMatrix * float4(in.position, 0.0)); - //Fog + // Fog out.fogColor = uniforms.calcFog(vCamPosition); const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; - for(size_t layer=0; layeralphaThreshold) { discard_fragment(); } + if (currentColor.a < fragmentShaderArgs.bufferedUniforms->alphaThreshold) { + discard_fragment(); + } return currentColor; } @@ -516,7 +514,7 @@ constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t b srcSample.a = srcSample.a; } - switch( blendFlags & ( kBlendAlphaAdd | kBlendAlphaMult ) ) { + switch (blendFlags & ( kBlendAlphaAdd | kBlendAlphaMult )) { case 0: destSample.a = destSample.a; break; @@ -642,12 +640,12 @@ vertex ColorInOut shadowCastVertexShader(Vertex in [[stage_in]], const float4 vCamPosition = position * uniforms.worldToCameraMatrix; - //Fog + // Fog out.fogColor = uniforms.calcFog(vCamPosition); const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; - for(size_t layer=0; layer 0) { + // only possible alpha sources are layers 0 or 1 + if (alphaSrc == 0 && passCount > 0) { half4 layerColor = layers.sampleLayer(0, in.vtxColor,sourceTypes[0], in.texCoord3); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; - } else if(alphaSrc == 1 && passCount > 1) { + } else if (alphaSrc == 1 && passCount > 1) { half4 layerColor = layers.sampleLayer(1, in.vtxColor, sourceTypes[1], in.texCoord3); @@ -694,9 +692,9 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], currentColor.rgb *= in.vtxColor.a; } - //alpha blend goes here + //a lpha blend goes here - if(currentColor.a <= 0.0h) + if (currentColor.a <= 0.0h) discard_fragment(); return currentColor; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal index 16f6d8ae92..63000b5e69 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal @@ -69,11 +69,9 @@ const constant sampler lutSampler = sampler( filter::nearest ); -fragment half4 gammaCorrectFragment( - GammaVertexOut in [[stage_in]], - texture2d inputTexture [[texture(0)]], - texture1d_array LUT [[texture(1)]] - ) +fragment half4 gammaCorrectFragment(GammaVertexOut in [[stage_in]], + texture2d inputTexture [[texture(0)]], + texture1d_array LUT [[texture(1)]]) { float4 color = inputTexture.read(ushort2(in.position.xy)); return { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal index 032b186493..c3beb37f24 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal @@ -10,8 +10,8 @@ using namespace metal; #include "ShaderVertex.h" -//ignoring the int and pi constants here and using whats built in -//but reserving space for them in the buffer +// ignoring the int and pi constants here and using whats built in +// but reserving space for them in the buffer typedef struct { matrix_float4x4 Local2NDC; float4 intConstants; @@ -80,7 +80,7 @@ fragment half4 ps_GrassShader(vs_GrassInOut in [[stage_in]], half4 out = t0.sample(colorSampler, in.texCoord.xy); out *= half4(in.color); - if(out.a <= 0.1) + if (out.a <= 0.1) discard_fragment(); return out; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index 0269390866..e0375ed9a9 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -152,13 +152,13 @@ typedef struct typedef struct { - //transformation + // transformation matrix_float4x4 projectionMatrix; matrix_float4x4 localToWorldMatrix; matrix_float4x4 cameraToWorldMatrix; matrix_float4x4 worldToCameraMatrix; - //lighting + // lighting half4 globalAmb; half3 ambientCol; uint8_t ambientSrc; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal index 8d07e91daa..89513a03eb 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal @@ -85,7 +85,7 @@ vertex ColorInOut textFontVertexShader(constant Vertex *in [[ buffer(0) ]], } fragment half4 textFontFragmentShader(ColorInOut in [[stage_in]], - texture2d colorMap [[ texture(0) ]]) + texture2d colorMap [[ texture(0) ]]) { constexpr sampler colorSampler(mip_filter::nearest, mag_filter::nearest, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index a587a56473..04998c3d8e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -349,7 +349,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], float3 D = r5.xyz; float3 F = uniforms.EnvAdjust.xyz; float G = uniforms.EnvAdjust.w; - //METAL NOTE: HLSL 1.1 always applies an abs operation to values it's about to sqrt + // METAL NOTE: HLSL 1.1 always applies an abs operation to values it's about to sqrt float3 t = dot(D.xyz, F.xyz) + sqrt(abs(pow(abs(dot(D.xyz, F.xyz)), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 1c8371d743..8f14e4fc4d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -40,27 +40,26 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ -#ifndef plMetalDevice_hpp -#define plMetalDevice_hpp - -// We need to define these once for Metal somewhere in a cpp file +// We need to define these once and only one for Metal somewhere +// in a cpp file before the Metal-cpp include (via plMetalDevice) #define NS_PRIVATE_IMPLEMENTATION #define CA_PRIVATE_IMPLEMENTATION #define MTL_PRIVATE_IMPLEMENTATION #include "plMetalDevice.h" -#include - -#include "ShaderTypes.h" +#include "hsDarwin.h" #include "hsThread.h" + #include "plDrawable/plGBufferGroup.h" #include "plGImage/plCubicEnvironmap.h" #include "plGImage/plMipmap.h" -#include "plMetalPipeline.h" -#include "plMetalPipelineState.h" #include "plPipeline/plRenderTarget.h" +#include "pfMetalPipeline/plMetalPipeline.h" +#include "pfMetalPipeline/plMetalPipelineState.h" +#include "pfMetalPipeline/ShaderSrc/ShaderTypes.h" + matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst) { if (src.fFlags & hsMatrix44::kIsIdent) { @@ -215,9 +214,7 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh void plMetalDevice::BeginNewRenderPass() { - // printf("Beginning new render pass\n"); - - // lazilly create the screen render encoder if it does not yet exist + // lazily create the screen render encoder if it does not yet exist if (!fCurrentOffscreenCommandBuffer && !fCurrentRenderTargetCommandEncoder) { SetRenderTarget(nullptr); } @@ -228,7 +225,7 @@ void plMetalDevice::BeginNewRenderPass() // pass descriptor and submit more commands fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); - fCurrentRenderTargetCommandEncoder = nil; + fCurrentRenderTargetCommandEncoder = nullptr; } MTL::RenderPassDescriptor* renderPassDescriptor = MTL::RenderPassDescriptor::renderPassDescriptor(); @@ -309,7 +306,7 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) // pass descriptor and submit more commands fCurrentRenderTargetCommandEncoder->endEncoding(); fCurrentRenderTargetCommandEncoder->release(); - fCurrentRenderTargetCommandEncoder = nil; + fCurrentRenderTargetCommandEncoder = nullptr; } if (fCurrentOffscreenCommandBuffer) { @@ -326,7 +323,7 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) fCurrentOffscreenCommandBuffer->waitUntilCompleted(); } fCurrentOffscreenCommandBuffer->release(); - fCurrentOffscreenCommandBuffer = nil; + fCurrentOffscreenCommandBuffer = nullptr; } fCurrentRenderTarget = target; @@ -359,23 +356,23 @@ void plMetalDevice::SetRenderTarget(plRenderTarget* target) } plMetalDevice::plMetalDevice() - : fErrorMsg(nullptr), + : fErrorMsg(), fActiveThread(hsThread::ThisThreadHash()), - fCurrentDrawable(nullptr), - fCommandQueue(nullptr), - fCurrentRenderTargetCommandEncoder(nullptr), - fCurrentDrawableDepthTexture(nullptr), - fCurrentFragmentOutputTexture(nullptr), - fCurrentCommandBuffer(nullptr), - fCurrentOffscreenCommandBuffer(nullptr), - fCurrentRenderTarget(nullptr), + fCurrentDrawable(), + fCommandQueue(), + fCurrentRenderTargetCommandEncoder(), + fCurrentDrawableDepthTexture(), + fCurrentFragmentOutputTexture(), + fCurrentCommandBuffer(), + fCurrentOffscreenCommandBuffer(), + fCurrentRenderTarget(), fNewPipelineStateMap(), - fCurrentFragmentMSAAOutputTexture(nullptr), - fCurrentUnprocessedOutputTexture(nullptr), - fGammaLUTTexture(nullptr), - fGammaAdjustState(nullptr), - fBlitCommandBuffer(nullptr), - fBlitCommandEncoder(nullptr) + fCurrentFragmentMSAAOutputTexture(), + fCurrentUnprocessedOutputTexture(), + fGammaLUTTexture(), + fGammaAdjustState(), + fBlitCommandBuffer(), + fBlitCommandEncoder() { fClearRenderTargetColor = {0.0, 0.0, 0.0, 1.0}; fClearDrawableColor = {0.0, 0.0, 0.0, 1.0}; @@ -541,7 +538,7 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr /// Interleaved, do straight copy memcpy(ptr, srcVPtr + cell->fVtxStart, cell->fLength * vertSize); ptr += cell->fLength * vertSize; - assert(size <= cell->fLength * vertSize); + hsAssert(size <= cell->fLength * vertSize, "Interleaved copy size mismatch"); } else { hsStatusMessage("Non interleaved data"); @@ -714,11 +711,11 @@ void plMetalDevice::ReleaseFramebufferObjects() { if (fCurrentUnprocessedOutputTexture) fCurrentUnprocessedOutputTexture->release(); - fCurrentFragmentOutputTexture = nil; + fCurrentFragmentOutputTexture = nullptr; if (fGammaAdjustState) fGammaAdjustState->release(); - fGammaAdjustState = nil; + fGammaAdjustState = nullptr; } void plMetalDevice::SetFramebufferFormat(MTL::PixelFormat format) @@ -811,7 +808,7 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef* tRef, plMipmap* i }; RGBA4444Component* in = (RGBA4444Component*)img->GetCurrLevelPtr(); - simd_uint4* out = (simd_uint4*)malloc(img->GetCurrHeight() * img->GetCurrWidth() * 4); + auto out = std::make_unique(img->GetCurrHeight() * img->GetCurrWidth()); for (int i = 0; i < (img->GetCurrWidth() * img->GetCurrHeight()); i++) { out[i].r = in[i].r; @@ -820,18 +817,19 @@ void plMetalDevice::PopulateTexture(plMetalDevice::TextureRef* tRef, plMipmap* i out[i].a = in[i].a; } - tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, out, img->GetCurrWidth() * 4, 0); - - free(out); + tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, out.get(), img->GetCurrWidth() * 4, 0); } else { tRef->fTexture->replaceRegion(MTL::Region::Make2D(0, 0, img->GetCurrWidth(), img->GetCurrHeight()), img->GetCurrLevel(), slice, img->GetCurrLevelPtr(), img->GetCurrWidth() * 4, 0); } } else { - printf("Texture with no image data?\n"); + hsAssert(0, "Texture with no image data?\n"); } } } - tRef->fTexture->setLabel(NS::String::string(img->GetKeyName().c_str(), NS::UTF8StringEncoding)); + + CFStringRef name = CFStringCreateWithSTString(img->GetKeyName()); + tRef->fTexture->setLabel(reinterpret_cast(name)); + CFRelease(name); tRef->SetDirty(false); } @@ -882,7 +880,7 @@ void plMetalDevice::MakeCubicTextureRef(plMetalDevice::TextureRef* tRef, plCubic tRef->fTexture = fMetalDevice->newTexture(descriptor); - static const uint kFaceMapping[] = { + static constexpr uint kFaceMapping[] = { 1, // kLeftFace 0, // kRightFace 4, // kFrontFace @@ -993,13 +991,12 @@ void plMetalDevice::CreateNewCommandBuffer(CA::MetalDrawable* drawable) void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable** condOut) { - __block std::condition_variable* newCondition = new std::condition_variable(); - fConditionMap[record] = newCondition; + fConditionMap[record] = new std::condition_variable(); if (condOut) { - *condOut = newCondition; + *condOut = fConditionMap[record]; } - if (fNewPipelineStateMap[record] != NULL) { + if (fNewPipelineStateMap[record] != nullptr) { // The shader is already compiled. return; } @@ -1042,7 +1039,7 @@ void plMetalDevice::StartPipelineBuild(plMetalPipelineRecord& record, std::condi fNewPipelineStateMap[record] = linkedPipeline; // signal that we're done - newCondition->notify_all(); + fConditionMap[record]->notify_all(); } }); @@ -1156,10 +1153,10 @@ void plMetalDevice::SubmitCommandBuffer() fCurrentCommandBuffer->presentDrawable(fCurrentDrawable); fCurrentCommandBuffer->commit(); fCurrentCommandBuffer->release(); - fCurrentCommandBuffer = nil; + fCurrentCommandBuffer = nullptr; fCurrentDrawable->release(); - fCurrentDrawable = nil; + fCurrentDrawable = nullptr; // Reset the clear colors for the next pass // Metal clears on framebuffer load - so don't cause a clear @@ -1225,9 +1222,9 @@ void plMetalDevice::PostprocessIntoDrawable() gammaAdjustEncoder->endEncoding(); } -std::size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMetalPipelineRecord const& s) const noexcept +size_t plMetalDevice::plMetalPipelineRecordHashFunction ::operator()(plMetalPipelineRecord const& s) const noexcept { - std::size_t value = std::hash()(s.depthFormat); + size_t value = std::hash()(s.depthFormat); value ^= std::hash()(s.colorFormat); value ^= std::hash()(*s.state); value ^= std::hash()(s.sampleCount); @@ -1276,5 +1273,3 @@ void plMetalDevice::BlitTexture(MTL::Texture* src, MTL::Texture* dst) fBlitCommandEncoder->copyFromTexture(src, 0, 0, MTL::Origin(0, 0, 0), MTL::Size(src->width(), src->height(), 0), dst, 0, 0, MTL::Origin(0, 0, 0)); } - -#endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 8abaeef984..84767ca168 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -246,7 +246,7 @@ class plMetalDevice MTL::CommandBuffer* fBlitCommandBuffer; MTL::BlitCommandEncoder* fBlitCommandEncoder; - bool NeedsPostprocessing() + bool NeedsPostprocessing() const { return fGammaLUTTexture != nullptr; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp deleted file mode 100644 index 373043be70..0000000000 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.cpp +++ /dev/null @@ -1,78 +0,0 @@ -// -// plMetalDeviceRef.cpp -// CoreLib -// -// Created by Colin Cornaby on 11/10/21. -// - -#include "plMetalDeviceRef.h" - -void plMetalDeviceRef::Unlink() -{ - hsAssert(fBack, "plGLDeviceRef not in list"); - - if (fNext) - fNext->fBack = fBack; - *fBack = fNext; - - fBack = nullptr; - fNext = nullptr; -} - -void plMetalDeviceRef::Link(plMetalDeviceRef **back) -{ - hsAssert(fNext == nullptr && fBack == nullptr, "Trying to link a plMetalDeviceRef that's already linked"); - - fNext = *back; - if (*back) - (*back)->fBack = &fNext; - fBack = back; - *back = this; -} - -plMetalDeviceRef::~plMetalDeviceRef() -{ - if (fNext != nullptr || fBack != nullptr) - Unlink(); -} - -plMetalVertexBufferRef::~plMetalVertexBufferRef() -{ - Release(); -} - -void plMetalVertexBufferRef::Release() -{ - SetDirty(true); -} - -plMetalTextureRef::~plMetalTextureRef() -{ - // fTexture->release(); - Release(); -} - -void plMetalTextureRef::Release() -{ - SetDirty(true); -} - -plMetalIndexBufferRef::~plMetalIndexBufferRef() -{ - Release(); -} - -void plMetalIndexBufferRef::Release() -{ - SetDirty(true); -} - -plMetalRenderTargetRef::~plMetalRenderTargetRef() -{ - Release(); -} - -void plMetalRenderTargetRef::Release() -{ - SetDirty(true); -} diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h index e30df0db03..c7608d6aad 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -80,11 +80,20 @@ class plMetalDeviceRef : public hsGDeviceRef }; /* - The buffer pool stores and recycles buffers so that Plasma can encode GPU commands and render in parallel. That means we can't touch buffers the GPU is using, and if a pass or frame rewrites a buffer we have to make sure it's not stomping on something that is already attached to a frame. Because Metal can triple buffer, the first dimension of caching is hard coded to 3. Some ages will also rewrite buffers an unspecified number of times between render passes. For example: A reflection render and a main render might have different index buffers. So the second dimension of caching uses an unbounded vector that will hold enough buffers to render in any one age. - - Buffer pools do not allocate buffers, they only store them. The outside caller is responsible for allocating a buffer and then setting it. The buffer pool will retain any buffers within the pool, and automatically release them when they are overwritten or the pool is deallocated. - - Because buffers are only stored on write, and no allocations happen within the pool, overhead is kept low for static buffers. Completely static buffers will never expand the pool if they only write once. + The buffer pool stores and recycles buffers so that Plasma can encode GPU commands and render in + parallel. That means we can't touch buffers the GPU is using, and if a pass or frame rewrites a + buffer we have to make sure it's not stomping on something that is already attached to a frame. + Because Metal can triple buffer, the first dimension of caching is hard coded to 3. Some ages + will also rewrite buffers an unspecified number of times between render passes. For example: A r + eflection render and a main render might have different index buffers. So the second dimension of + caching uses an unbounded vector that will hold enough buffers to render in any one age. + + Buffer pools do not allocate buffers, they only store them. The outside caller is responsible for + allocating a buffer and then setting it. The buffer pool will retain any buffers within the pool, + and automatically release them when they are overwritten or the pool is deallocated. + + Because buffers are only stored on write, and no allocations happen within the pool, overhead is + kept low for static buffers. Completely static buffers will never expand the pool if they only write once. */ class plMetalBufferPoolRef : public plMetalDeviceRef { @@ -94,10 +103,10 @@ class plMetalBufferPoolRef : public plMetalDeviceRef uint32_t fLastWriteFrameTime; plMetalBufferPoolRef() : plMetalDeviceRef(), - fLastWriteFrameTime(0), - fCurrentPass(0), - fCurrentFrame(0), - fBuffer(nullptr) + fLastWriteFrameTime(), + fCurrentPass(), + fCurrentFrame(), + fBuffer() { } @@ -124,7 +133,7 @@ class plMetalBufferPoolRef : public plMetalDeviceRef static void SetFrameTime(uint32_t frameTime) { fFrameTime = frameTime; }; - MTL::Buffer* GetBuffer() { return fBuffer; }; + MTL::Buffer* GetBuffer() const { return fBuffer; }; void SetBuffer(MTL::Buffer* buffer) { @@ -189,21 +198,21 @@ class plMetalVertexBufferRef : public plMetalBufferPoolRef void SetRefTime(uint32_t t) { fRefTime = t; } plMetalVertexBufferRef() : plMetalBufferPoolRef(), - fCount(0), - fIndex(0), - fVertexSize(0), - fOffset(0), - fOwner(nullptr), - fData(nullptr), - fFormat(0), - fRefTime(0) + fCount(), + fIndex(), + fVertexSize(), + fOffset(), + fOwner(), + fData(), + fFormat(), + fRefTime() { } virtual ~plMetalVertexBufferRef(); void Link(plMetalVertexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalVertexBufferRef* GetNext() { return (plMetalVertexBufferRef*)fNext; } + plMetalVertexBufferRef* GetNext() const { return (plMetalVertexBufferRef*)fNext; } void Release() override; }; @@ -239,11 +248,11 @@ class plMetalIndexBufferRef : public plMetalBufferPoolRef virtual ~plMetalIndexBufferRef(); plMetalIndexBufferRef() : plMetalBufferPoolRef(), - fCount(0), - fIndex(0), - fRefTime(0), - fLastWriteFrameTime(0), - fOwner(nullptr) + fCount(), + fIndex(), + fRefTime(), + fLastWriteFrameTime(), + fOwner() { } }; @@ -258,11 +267,11 @@ class plMetalTextureRef : public plMetalDeviceRef MTL::PixelFormat fFormat; void Link(plMetalTextureRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalTextureRef* GetNext() { return (plMetalTextureRef*)fNext; } + plMetalTextureRef* const GetNext() { return (plMetalTextureRef*)fNext; } plMetalTextureRef() : plMetalDeviceRef(), - fOwner(nullptr), - fTexture(nullptr), + fOwner(), + fTexture(), fLevels(1) { } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp index 81233adbdc..87d4f6181a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRefs.cpp @@ -53,8 +53,8 @@ plProfile_CreateMemCounter("Textures", "Memory", MemTexture); ** Generic plGLDeviceRef Functions ** *****************************************************************************/ plMetalDeviceRef::plMetalDeviceRef() - : fNext(nullptr), - fBack(nullptr) + : fNext(), + fBack() { } @@ -95,8 +95,7 @@ void plMetalDeviceRef::Link(plMetalDeviceRef **back) plMetalVertexBufferRef::~plMetalVertexBufferRef() { - if (fData) - delete fData; + delete fData; Release(); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h deleted file mode 100644 index 3628b91368..0000000000 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.h +++ /dev/null @@ -1,13 +0,0 @@ -// -// plMetalEnumerate.hpp -// pfMetalPipeline -// -// Created by Colin Cornaby on 3/20/22. -// - -#ifndef plMetalEnumerate_hpp -#define plMetalEnumerate_hpp - -#include - -#endif /* plMetalEnumerate_hpp */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp index a3f84bf7ff..8b0c886033 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp @@ -61,9 +61,7 @@ plMetalFragmentShader::~plMetalFragmentShader() void plMetalFragmentShader::Release() { - fPipe = nil; - - // ISetError(nil); + fPipe = nullptr; } bool plMetalFragmentShader::ISetConstants(plMetalPipeline* pipe) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h index 427469a7ee..10a1ccbcae 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h @@ -56,9 +56,9 @@ class plMetalFragmentShader : public plMetalShader plMetalFragmentShader(plShader* owner); virtual ~plMetalFragmentShader(); - void Release() override; + void Release() override; void Link(plMetalFragmentShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalFragmentShader* GetNext() { return (plMetalFragmentShader*)fNext; } + plMetalFragmentShader* const GetNext() { return (plMetalFragmentShader*)fNext; } }; #endif // plMetalFragmentShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index ddf4568e31..4b0498e613 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -42,11 +42,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMetalMaterialShaderRef.h" -#include - #include "HeadSpin.h" #include "hsBitVector.h" #include "hsGMatState.inl" + #include "plDrawable/plGBufferGroup.h" #include "plGImage/plCubicEnvironmap.h" #include "plGImage/plMipmap.h" @@ -59,10 +58,12 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plSurface/hsGMaterial.h" #include "plSurface/plLayerInterface.h" -plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe) : fPipeline{pipe}, - fMaterial{mat}, +#include + +plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe) : fPipeline(pipe), + fMaterial(mat), fFragFunction(), - fNumPasses(0) + fNumPasses() { fDevice = pipe->fDevice.fMetalDevice; fFragFunction = pipe->fFragFunction; @@ -78,7 +79,7 @@ void plMetalMaterialShaderRef::Release() { for (auto& buffer : fPassArgumentBuffers) { buffer->release(); - buffer = nil; + buffer = nullptr; } fPassArgumentBuffers.clear(); @@ -154,7 +155,7 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder* en // continue; // } - assert(i - GetPassIndex(pass) >= 0); + hsAssert(i - GetPassIndex(pass) >= 0, "Bad pass index during encode"); EncodeTransform(layer, &vertexUniforms->uvTransforms[i - GetPassIndex(pass)]); IBuildLayerTexture(encoder, i - GetPassIndex(pass), layer); } @@ -162,7 +163,12 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder* en encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, FragmentShaderArgumentUniforms); } -void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass, plMetalFragmentShaderDescription* passDescription, std::vector* piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform) +void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encoder, + VertexUniforms* vertexUniforms, uint pass, + plMetalFragmentShaderDescription* passDescription, + std::vector* piggyBacks, + std::function preEncodeTransform, + std::function postEncodeTransform) { std::vector layers = GetLayersForPass(pass); @@ -174,7 +180,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encode IHandleMaterial( GetPassIndex(pass), passDescription, &uniforms, piggyBacks, - [&](plLayerInterface* layer, uint32_t index) { + [this, &preEncodeTransform, &encoder, &pass, &vertexUniforms](plLayerInterface* layer, uint32_t index) { layer = preEncodeTransform(layer, index); IBuildLayerTexture(encoder, index, layer); @@ -184,10 +190,8 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encode EncodeTransform(layer, &vertexUniforms->uvTransforms[index]); return layer; - }, - [&](plLayerInterface* layer, uint32_t index) { - layer = postEncodeTransform(layer, index); - return layer; + }, [&postEncodeTransform](plLayerInterface* layer, uint32_t index) { + return postEncodeTransform(layer, index); }); encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), FragmentShaderArgumentUniforms); @@ -253,8 +257,7 @@ void plMetalMaterialShaderRef::ILoopOverLayers() currLayer, &passDescription, layerBuffer, nullptr, [](plLayerInterface* layer, uint32_t index) { return layer; - }, - [](plLayerInterface* layer, uint32_t index) { + }, [](plLayerInterface* layer, uint32_t index) { return layer; }); @@ -291,7 +294,7 @@ void plMetalMaterialShaderRef::ILoopOverLayers() } } -const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerInterface* layer) +const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerInterface* layer) const { hsGMatState state; state.Composite(layer->GetState(), fPipeline->GetMaterialOverride(true), fPipeline->GetMaterialOverride(false)); @@ -402,7 +405,12 @@ bool plMetalMaterialShaderRef::ICanEatLayer(plLayerInterface* lay) return true; } -uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription* passDescription, plMetalFragmentShaderArgumentBuffer* uniforms, std::vector* piggybacks, const std::function& preEncodeTransform, const std::function& postEncodeTransform) +uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, + plMetalFragmentShaderDescription* passDescription, + plMetalFragmentShaderArgumentBuffer* uniforms, + std::vector* piggybacks, + const std::function& preEncodeTransform, + const std::function& postEncodeTransform) { if (!fMaterial || layer >= fMaterial->GetNumLayers() || !fMaterial->GetLayer(layer)) { return -1; @@ -422,7 +430,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme // Ignoring the bit about self-rendering cube maps - plLayerInterface* currLay = /*IPushOverBaseLayer*/ fMaterial->GetLayer(layer); + plLayerInterface* currLay = fMaterial->GetLayer(layer); currLay = preEncodeTransform(currLay, 0); if (fPipeline->IsDebugFlagSet(plPipeDbg::kFlagBumpW) && (currLay->GetMiscFlags() & hsGMatState::kMiscBumpDu)) { @@ -501,7 +509,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragme } } - passDescription->numLayers = (piggybacks ? piggybacks->size() : 0) + currNumLayers; + passDescription->fNumLayers = (piggybacks ? piggybacks->size() : 0) + currNumLayers; if (state.fBlendFlags & (hsGMatState::kBlendTest | hsGMatState::kBlendAlpha | hsGMatState::kBlendAddColorTimesAlpha) && !(state.fBlendFlags & hsGMatState::kBlendAlphaAlways)) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index e1830ba7a6..84b7eea0e4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -70,7 +70,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef public: void Link(plMetalMaterialShaderRef **back) { plMetalDeviceRef::Link((plMetalDeviceRef **)back); } - plMetalMaterialShaderRef *GetNext() { return (plMetalMaterialShaderRef *)fNext; } + plMetalMaterialShaderRef* GetNext() const { return (plMetalMaterialShaderRef *)fNext; } plMetalMaterialShaderRef(hsGMaterial *mat, plMetalPipeline *pipe); ~plMetalMaterialShaderRef(); @@ -81,9 +81,15 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef uint32_t GetNumPasses() const { return fNumPasses; } uint32_t GetPassIndex(size_t which) const { return fPassIndices[which]; } - const std::vector GetLayersForPass(size_t pass) { return fPasses[pass]; } - - void EncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass, plMetalFragmentShaderDescription *passDescription, std::vector *piggyBacks, std::function preEncodeTransform, std::function postEncodeTransform); + const std::vector GetLayersForPass(size_t pass) const { return fPasses[pass]; } + + void EncodeArguments(MTL::RenderCommandEncoder *encoder, + VertexUniforms *vertexUniforms, + uint pass, + plMetalFragmentShaderDescription *passDescription, + std::vector *piggyBacks, + std::function preEncodeTransform, + std::function postEncodeTransform); void FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass); // probably not a good idea to call prepareTextures directly // mostly just a hack to keep plates working for now @@ -93,9 +99,9 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef // Set the current Plasma state based on the input layer state and the material overrides. // fMatOverOn overrides to set a state bit whether it is set in the layer or not. // fMatOverOff overrides to clear a state bit whether it is set in the layer or not.s - const hsGMatState ICompositeLayerState(const plLayerInterface *layer); + const hsGMatState ICompositeLayerState(const plLayerInterface *layer) const; - const struct plMetalFragmentShaderDescription GetFragmentShaderDescription(size_t which) + const struct plMetalFragmentShaderDescription GetFragmentShaderDescription(size_t which) const { return fFragmentShaderDescriptions[which]; } @@ -104,13 +110,18 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void ILoopOverLayers(); uint32_t fNumPasses; - uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription *passDescription, plMetalFragmentShaderArgumentBuffer *uniforms, std::vector *piggybacks, const std::function& preEncodeTransform, const std::function& postEncodeTransform); + uint32_t IHandleMaterial(uint32_t layer, + plMetalFragmentShaderDescription* passDescription, + plMetalFragmentShaderArgumentBuffer* uniforms, + std::vector* piggybacks, + const std::function& preEncodeTransform, + const std::function& postEncodeTransform); bool ICanEatLayer(plLayerInterface *lay); uint32_t ILayersAtOnce(uint32_t which); - void IBuildLayerTexture(MTL::RenderCommandEncoder *encoder, uint32_t offsetFromRootLayer, plLayerInterface *layer); - void EncodeTransform(plLayerInterface *layer, UVOutDescriptor *transform); - std::vector> fPasses; + void IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer); + void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); + std::vector> fPasses; std::vector fFragmentShaderDescriptions; }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 570830dd00..d2dfeada91 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -49,6 +49,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "HeadSpin.h" #include "hsGMatState.inl" #include "hsTimer.h" + #include "pfCamera/plVirtualCamNeu.h" #include "plAvatar/plAvatarClothing.h" #include "plDrawable/plAuxSpan.h" @@ -79,6 +80,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plSurface/plLayerShadowBase.h" #include "plTweak.h" #include "plgDispatch.h" + #include "pnMessage/plPipeResMakeMsg.h" #include "pnNetCommon/plNetApp.h" // for dbg logging @@ -160,12 +162,12 @@ bool plRenderTriListFunc::RenderPrims() const plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord* devMode) : pl3DPipeline(devMode), fRenderTargetRefList(), fMatRefList(), - fCurrentRenderPassUniforms(nullptr), - currentDrawableCallback(nullptr), - fFragFunction(nullptr), - fVShaderRefList(nullptr), - fPShaderRefList(nullptr), - fULutTextureRef(nullptr), + fCurrentRenderPassUniforms(), + currentDrawableCallback(), + fFragFunction(), + fVShaderRefList(), + fPShaderRefList(), + fULutTextureRef(), fCurrRenderLayer() { fTextureRefList = nullptr; @@ -274,7 +276,8 @@ plTextFont* plMetalPipeline::MakeTextFont(ST::string face, uint16_t size) bool plMetalPipeline::OpenAccess(plAccessSpan& dst, plDrawableSpans* d, const plVertexSpan* span, bool readOnly) { - // FIXME: Whats this? + // FIXME: What's this? + // Hoikas: It's for runtime reading/writing the vertices, mostly used by stuff like dynamic decals. return false; } @@ -376,7 +379,8 @@ hsGDeviceRef* plMetalPipeline::MakeRenderTargetRef(plRenderTarget* owner) plCubicRenderTarget* cubicRT; // If we have Shader Model 3 and support non-POT textures, let's make reflections the pipe size - if (plDynamicCamMap* camMap = plDynamicCamMap::ConvertNoRef(owner)) { + plDynamicCamMap* camMap = plDynamicCamMap::ConvertNoRef(owner); + if (camMap && camMap->IsReflection()) { camMap->ResizeViewport(IGetViewTransform()); } @@ -432,7 +436,7 @@ hsGDeviceRef* plMetalPipeline::MakeRenderTargetRef(plRenderTarget* owner) plRenderTarget* face = cubicRT->GetFace(i); plMetalRenderTargetRef* fRef; - if (face->GetDeviceRef() != nil) { + if (face->GetDeviceRef() != nullptr) { fRef = (plMetalRenderTargetRef*)face->GetDeviceRef(); if (!fRef->IsLinked()) fRef->Link(&fRenderTargetRefList); @@ -956,13 +960,21 @@ void plMetalPipeline::GetSupportedDisplayModes(std::vector* res, /* There are decisions to make here. - Modern macOS does not support "display modes." You panel runs at native resolution at all times, and you can over-render or under-render. But you never set the display mode of the panel, or get the display mode of the panel. Most games have a "scale slider." + Modern macOS does not support "display modes." You panel runs at native resolution at all times, + and you can over-render or under-render. But you never set the display mode of the panel, or get + the display mode of the panel. Most games have a "scale slider." - Note: There are legacy APIs for display modes for compatibility with older software. In since we're here writing a new renderer, lets do things the right way. The display mode APIs also have trouble with density. I.E. a 4k display might be reported as a 2k display if the window manager is running in a higher DPI mode. + Note: There are legacy APIs for display modes for compatibility with older software. In since + we're here writing a new renderer, lets do things the right way. The display mode APIs also have + trouble with density. I.E. a 4k display might be reported as a 2k display if the window manager is + running in a higher DPI mode. - The basic approach should be to render at whatever the resolution of our output surface is. We're mostly doing that now (aspect ratio doesn't adjust.) + The basic approach should be to render at whatever the resolution of our output surface is. We're + mostly doing that now (aspect ratio doesn't adjust.) - Ideally we should support some sort of scaling/semi dynamic renderbuffer resolution thing. But don't mess with the window servers framebuffer size. macOS has accelerated resolution scaling like consoles do. Use that. + Ideally we should support some sort of scaling/semi dynamic renderbuffer resolution thing. But don't + mess with the window servers framebuffer size. macOS has accelerated resolution scaling like consoles + do. Use that. */ std::vector supported; @@ -999,7 +1011,7 @@ int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) void plMetalPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync) { - // FIXME: Whats this? + // FIXME: What's this? // Seems like an entry point for passing in display settings. fDevice.SetMaxAnsiotropy(MaxAnisotropicSamples); @@ -1310,14 +1322,13 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI IHandleZMode(hsGMatState::kZNoZWrite); // This is a bit weird - in since this isn't a material we need to build a query for the right Metal program ourselves - plMetalFragmentShaderDescription description; - memset(&description, 0, sizeof(description)); - description.numLayers = fCurrNumLayers = 1; + plMetalFragmentShaderDescription description{}; + description.fNumLayers = fCurrNumLayers = 1; description.Populate(proj, 0); // DX sets the color invert when the final color should be inverted. Not sure why! if (proj->GetBlendFlags() & hsGMatState::kBlendInvertFinalColor) { - description.blendModes[0] |= hsGMatState::kBlendInvertColor; + description.fBlendModes[0] |= hsGMatState::kBlendInvertColor; } plMetalMaterialPassPipelineState materialShaderState(&fDevice, vRef, description); @@ -1508,7 +1519,9 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, ISetCullMode(); } - // Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders to null after calling this. That won't work here in since our pipeline state has to know the shaders. + // Some build passes don't allow shaders. Render the geometry and the provided material, but don't allow the + // shader path if instructed to. In the DX source, this would be done by the render phase setting the shaders + // to null after calling this. That won't work here in since our pipeline state has to know the shaders. if (lay->GetVertexShader() && allowShaders) { lay = IPushOverBaseLayer(lay); lay = IPushOverAllLayer(lay); @@ -1622,32 +1635,39 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, fragmentShaderDescription = mRef->GetFragmentShaderDescription(pass); } else { // Plasma pulls piggybacks from the rear first, pull the number of active piggybacks - auto firstPiggyback = fPiggyBackStack.end() - numActivePiggyBacks; - auto lastPiggyback = fPiggyBackStack.end(); + auto firstPiggyback = fPiggyBackStack.end() - numActivePiggyBacks; + auto lastPiggyback = fPiggyBackStack.end(); + std::vector subPiggybacks(firstPiggyback, lastPiggyback); - mRef->EncodeArguments( - fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, &fragmentShaderDescription, &subPiggybacks, - [&](plLayerInterface* layer, uint32_t index) { + + auto preEncodeTransform = [this](plLayerInterface* layer, uint32_t index) { if (index == 0) { layer = IPushOverBaseLayer(layer); } layer = IPushOverAllLayer(layer); - + return layer; - }, - [&](plLayerInterface* layer, uint32_t index) { + }; + + auto postEncodeTransform = [this](plLayerInterface* layer, uint32_t index) { layer = IPopOverAllLayer(layer); if (index == 0) layer = IPopOverBaseLayer(layer); return layer; - }); + }; + + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), + fCurrentRenderPassUniforms, + pass, + &fragmentShaderDescription, + &subPiggybacks, + preEncodeTransform, + postEncodeTransform); } plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalMaterialPassPipelineState(&fDevice, vRef, fragmentShaderDescription).GetRenderPipelineState(); const MTL::RenderPipelineState* pipelineState = linkedPipeline->pipelineState; - - /*plMetalDevice::plMetalLinkedPipeline *pipeline = fDevice.pipelineStateFor(vRef, s.fBlendFlags, numActivePiggyBacks + mRef->fPassLengths[pass], plShaderID::Unregistered, plShaderID::Unregistered, sources, blendModes, miscFlags); - const MTL::RenderPipelineState *pipelineState = pipeline->pipelineState;*/ + if (fState.fCurrentPipelineState != pipelineState) { fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(pipelineState); fState.fCurrentPipelineState = pipelineState; @@ -2051,62 +2071,21 @@ void plMetalPipeline::ISetLayer(uint32_t lay) void plMetalPipeline::IHandleBlendMode(hsGMatState flags) { - // No color, just writing out Z values. + // This function is a weird leftover of CPU side blend mode setting. + // We need the error case, but nothing else? In Metal this is all + // done GPU side - but the GPU can't write an error state on a CPU + // side buffer. if (flags.fBlendFlags & hsGMatState::kBlendNoColor) { - // printf("glBlendFunc(GL_ZERO, GL_ONE);\n"); flags.fBlendFlags |= 0x80000000; } else { switch (flags.fBlendFlags & hsGMatState::kBlendMask) { - // Detail is just a special case of alpha, handled in construction of the texture - // mip chain by making higher levels of the chain more transparent. case hsGMatState::kBlendDetail: case hsGMatState::kBlendAlpha: - if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalAlpha) { - if (flags.fBlendFlags & hsGMatState::kBlendAlphaPremultiplied) { - // printf("glBlendFunc(GL_ONE, GL_SRC_ALPHA);\n"); - } else { - // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_SRC_ALPHA);\n"); - } - } else { - if (flags.fBlendFlags & hsGMatState::kBlendAlphaPremultiplied) { - // printf("glBlendFunc(GL_ONE, GL_ONE_MINUS_SRC_ALPHA);\n"); - } else { - // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA);\n"); - } - } - break; - - // Multiply the final color onto the frame buffer. case hsGMatState::kBlendMult: - if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalColor) { - // printf("glBlendFunc(GL_ZERO, GL_ONE_MINUS_SRC_COLOR);\n"); - } else { - // printf("glBlendFunc(GL_ZERO, GL_SRC_COLOR);\n"); - } - break; - - // Add final color to FB. case hsGMatState::kBlendAdd: - // printf("glBlendFunc(GL_ONE, GL_ONE);\n"); - break; - - // Multiply final color by FB color and add it into the FB. case hsGMatState::kBlendMADD: - // printf("glBlendFunc(GL_DST_COLOR, GL_ONE);\n"); - break; - - // Final color times final alpha, added into the FB. case hsGMatState::kBlendAddColorTimesAlpha: - if (flags.fBlendFlags & hsGMatState::kBlendInvertFinalAlpha) { - // printf("glBlendFunc(GL_ONE_MINUS_SRC_ALPHA, GL_ONE);\n"); - } else { - // printf("glBlendFunc(GL_SRC_ALPHA, GL_ONE);\n"); - } - break; - - // Overwrite final color onto FB case 0: - // printf("glBlendFunc(GL_ONE, GL_ZERO);\n"); break; default: { @@ -2121,7 +2100,8 @@ void plMetalPipeline::IHandleBlendMode(hsGMatState flags) } // layer state needs to be syncronized to the GPU static_cast(fCurrMaterial->GetDeviceRef())->SetDirty(true); - } break; + } + break; } } } @@ -2492,7 +2472,7 @@ void plMetalPipeline::IDrawPlate(plPlate* plate) fDevice.CurrentRenderCommandEncoder()->setDepthStencilState(pm->fDepthState); fDevice.CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); - int uniformSize = sizeof(VertexUniforms); + int uniformSize = sizeof(VertexUniforms); VertexUniforms uniforms; uniforms.projectionMatrix = projMat; matrix_float4x4 modelMatrix; @@ -2991,36 +2971,38 @@ void plMetalPipeline::IReleaseRenderTargetPools() for (i = 0; i < fRenderTargetPool512.size(); i++) { delete fRenderTargetPool512[i]; - fRenderTargetPool512[i] = nil; + fRenderTargetPool512[i] = nullptr; } fRenderTargetPool512.clear(); for (i = 0; i < fRenderTargetPool256.size(); i++) { delete fRenderTargetPool256[i]; - fRenderTargetPool256[i] = nil; + fRenderTargetPool256[i] = nullptr; } fRenderTargetPool256.clear(); for (i = 0; i < fRenderTargetPool128.size(); i++) { delete fRenderTargetPool128[i]; - fRenderTargetPool128[i] = nil; + fRenderTargetPool128[i] = nullptr; } fRenderTargetPool128.clear(); for (i = 0; i < fRenderTargetPool64.size(); i++) { delete fRenderTargetPool64[i]; - fRenderTargetPool64[i] = nil; + fRenderTargetPool64[i] = nullptr; } fRenderTargetPool64.clear(); for (i = 0; i < fRenderTargetPool32.size(); i++) { delete fRenderTargetPool32[i]; - fRenderTargetPool32[i] = nil; + fRenderTargetPool32[i] = nullptr; } fRenderTargetPool32.clear(); for (i = 0; i < kMaxRenderTargetNext; i++) { fRenderTargetNext[i] = 0; + // Blur is implemented in Metal through MPS. + // If we need a hand written blur algorithm implement here. // fBlurScratchRTs[i] = nil; // fBlurDestRTs[i] = nil; } @@ -3439,13 +3421,13 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe #endif // HS_DEBUGGING /// Check--is this renderTarget really a child of a cubicRenderTarget? - if (owner->GetParent() != nil) { + if (owner->GetParent() != nullptr) { /// This'll create the deviceRefs for all of its children as well SharedRenderTargetRef(share->GetParent(), owner->GetParent()); return owner->GetDeviceRef(); } - if (owner->GetDeviceRef() != nil) + if (owner->GetDeviceRef() != nullptr) ref = (plMetalRenderTargetRef*)owner->GetDeviceRef(); // Look for a good format of matching color and depth size. @@ -3467,9 +3449,9 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe // Check for Cubic. This is unlikely, since this function is currently only // used for the shadow map pools. cubicRT = plCubicRenderTarget::ConvertNoRef(owner); - if (cubicRT != nil) { + if (cubicRT != nullptr) { /// And create the ref (it'll know how to set all the flags) - if (ref != nil) + if (ref != nullptr) ref->SetOwner(owner); else { ref = new plMetalRenderTargetRef(); @@ -3486,7 +3468,7 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe plRenderTarget* face = cubicRT->GetFace(i); plMetalRenderTargetRef* fRef; - if (face->GetDeviceRef() != nil) { + if (face->GetDeviceRef() != nullptr) { fRef = (plMetalRenderTargetRef*)face->GetDeviceRef(); fRef->SetOwner(face); if (!fRef->IsLinked()) @@ -3504,14 +3486,14 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe ref->fTexture = cubeTexture; } else { hsRefCnt_SafeUnRef(ref); - ref = nil; + ref = nullptr; } } // Is it a texture render target? Probably, since shadow maps are all we use this for. else if (owner->GetFlags() & plRenderTarget::kIsTexture || owner->GetFlags() & plRenderTarget::kIsOffscreen) { // DX seperated the onscreen and offscreen types. Metal doesn't care. All render targets are textures. /// Create a normal texture - if (ref != nil) + if (ref != nullptr) ref->SetOwner(owner); else { ref = new plMetalRenderTargetRef(); @@ -3526,7 +3508,7 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe ref->fTexture = texture; } else { hsRefCnt_SafeUnRef(ref); - ref = nil; + ref = nullptr; } if (owner->GetZDepth() && (owner->GetFlags() & (plRenderTarget::kIsTexture | plRenderTarget::kIsOffscreen))) { @@ -3550,14 +3532,14 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe owner->SetDeviceRef(ref); // Unref now, since for now ONLY the RT owns the ref, not us (not until we use it, at least) hsRefCnt_SafeUnRef(ref); - if (ref != nil && !ref->IsLinked()) + if (ref != nullptr && !ref->IsLinked()) ref->Link(&fRenderTargetRefList); } else { - if (ref != nil && !ref->IsLinked()) + if (ref != nullptr && !ref->IsLinked()) ref->Link(&fRenderTargetRefList); } - if (ref != nil) { + if (ref != nullptr) { ref->SetDirty(false); } @@ -3592,7 +3574,7 @@ void plMetalPipeline::IMakeRenderTargetPools() }; int i; for (i = 0; i < kMaxRenderTargetNext; i++) { - std::vector* pool = nil; + std::vector* pool = nullptr; switch (i) { default: case 0: @@ -3621,7 +3603,7 @@ void plMetalPipeline::IMakeRenderTargetPools() if (pool) { pool->resize(kCount[i] + 1); (*pool)[0] = nil; - (*pool)[(int)(kCount[i])] = nil; + (*pool)[(int)(kCount[i])] = nullptr; int j; for (j = 0; j < kCount[i]; j++) { uint16_t flags = plRenderTarget::kIsTexture | plRenderTarget::kIsProjected; @@ -3675,8 +3657,8 @@ void plMetalPipeline::IResetRenderTargetPools() int i; for (i = 0; i < kMaxRenderTargetNext; i++) { fRenderTargetNext[i] = 0; - // fBlurScratchRTs[i] = nil; - // fBlurDestRTs[i] = nil; + // fBlurScratchRTs[i] = nullptr; + // fBlurDestRTs[i] = nullptr; } // fLights.fNextShadowLight = 0; @@ -3695,7 +3677,7 @@ void plMetalPipeline::IRenderShadowCasterSpan(plShadowSlave* slave, plDrawableSp plMetalVertexBufferRef* vRef = (plMetalVertexBufferRef*)drawable->GetVertexRef(span.fGroupIdx, span.fVBufferIdx); plMetalIndexBufferRef* iRef = (plMetalIndexBufferRef*)drawable->GetIndexRef(span.fGroupIdx, span.fIBufferIdx); - if (vRef->GetBuffer() == nil || iRef->GetBuffer() == nil) { + if (vRef->GetBuffer() == nullptr || iRef->GetBuffer() == nullptr) { hsAssert(false, "Trying to render a nil buffer pair!"); return; } @@ -3767,10 +3749,9 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con plShadowState shadowState; ISetupShadowState(fShadows[i], shadowState); - struct plMetalFragmentShaderDescription passDescription; - memset(&passDescription, 0, sizeof(passDescription)); + struct plMetalFragmentShaderDescription passDescription{}; - passDescription.numLayers = fCurrNumLayers = 3; + passDescription.fNumLayers = fCurrNumLayers = 3; /* Things get a wee bit complicated here. @@ -3795,7 +3776,7 @@ void plMetalPipeline::IRenderShadowsOntoSpan(const plRenderPrimFunc& render, con } // There's no texture for the third stage if we're reusing the textures // for the first and second stages from the last render. - passDescription.passTypes[2] = PassTypeColor; + passDescription.fPassTypes[2] = PassTypeColor; plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalRenderShadowPipelineState(&fDevice, vRef, passDescription).GetRenderPipelineState(); if (fState.fCurrentPipelineState != linkedPipeline->pipelineState) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 2f662b7a8c..640599a957 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -79,9 +79,9 @@ void plMetalRenderSpanPipelineState::GetFunctionConstants(MTL::FunctionConstantV size_t plMetalRenderSpanPipelineState::GetHash() const { - std::size_t h1 = std::hash()(fNumUVs); - std::size_t h2 = std::hash()(fNumWeights); - std::size_t h3 = std::hash()(fHasSkinIndices); + size_t h1 = std::hash()(fNumUVs); + size_t h2 = std::hash()(fNumWeights); + size_t h3 = std::hash()(fHasSkinIndices); return h1 ^ h2 ^ h3 ^ plMetalPipelineState::GetHash(); } @@ -106,15 +106,15 @@ plMetalMaterialPassPipelineState::plMetalMaterialPassPipelineState(plMetalDevice void plMetalMaterialPassPipelineState::GetFunctionConstants(MTL::FunctionConstantValues* constants) const { plMetalRenderSpanPipelineState::GetFunctionConstants(constants); - constants->setConstantValue(&fFragmentShaderDescription.numLayers, MTL::DataTypeUChar, FunctionConstantNumLayers); - constants->setConstantValues(&fFragmentShaderDescription.passTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); - constants->setConstantValues(&fFragmentShaderDescription.blendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); - constants->setConstantValues(&fFragmentShaderDescription.miscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); + constants->setConstantValue(&fFragmentShaderDescription.fNumLayers, MTL::DataTypeUChar, FunctionConstantNumLayers); + constants->setConstantValues(&fFragmentShaderDescription.fPassTypes, MTL::DataTypeUChar, NS::Range(FunctionConstantSources, 8)); + constants->setConstantValues(&fFragmentShaderDescription.fBlendModes, MTL::DataTypeUInt, NS::Range(FunctionConstantBlendModes, 8)); + constants->setConstantValues(&fFragmentShaderDescription.fMiscFlags, MTL::DataTypeUInt, NS::Range(FunctionConstantLayerFlags, 8)); } size_t plMetalMaterialPassPipelineState::GetHash() const { - std::size_t value = plMetalRenderSpanPipelineState::GetHash(); + size_t value = plMetalRenderSpanPipelineState::GetHash(); value ^= fFragmentShaderDescription.GetHash(); return value; @@ -124,7 +124,7 @@ void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescri { int vertOffset = 0; int skinWeightOffset = vertOffset + (sizeof(float) * 3); - if (this->fHasSkinIndices) { + if (fHasSkinIndices) { skinWeightOffset += sizeof(uint32_t); } int normOffset = skinWeightOffset + (sizeof(float) * this->fNumWeights); @@ -140,7 +140,7 @@ void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescri vertexDescriptor->attributes()->object(VertexAttributeNormal)->setBufferIndex(0); vertexDescriptor->attributes()->object(VertexAttributeNormal)->setOffset(normOffset); - if (this->fNumWeights > 0) { + if (fNumWeights > 0) { int weightOneOffset = skinWeightOffset; vertexDescriptor->attributes()->object(VertexAttributeWeights)->setFormat(MTL::VertexFormatFloat); @@ -298,14 +298,14 @@ const NS::String* plMetalMaterialPassPipelineState::GetDescription() void plMetalMaterialPassPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) { - uint32_t blendMode = fFragmentShaderDescription.blendModes[0]; + uint32_t blendMode = fFragmentShaderDescription.fBlendModes[0]; ConfigureBlendMode(blendMode, descriptor); } void plMetalFragmentShaderDescription::Populate(const plLayerInterface* layPtr, const uint8_t index) { - blendModes[index] = layPtr->GetBlendFlags(); - miscFlags[index] = layPtr->GetMiscFlags(); + fBlendModes[index] = layPtr->GetBlendFlags(); + fMiscFlags[index] = layPtr->GetMiscFlags(); PopulateTextureInfo(layPtr, index); } @@ -314,15 +314,15 @@ void plMetalFragmentShaderDescription::PopulateTextureInfo(const plLayerInterfac plBitmap* texture = layPtr->GetTexture(); if (texture != nullptr) { if (plCubicEnvironmap::ConvertNoRef(texture) != nullptr || plCubicRenderTarget::ConvertNoRef(texture) != nullptr) { - passTypes[index] = PassTypeCubicTexture; + fPassTypes[index] = PassTypeCubicTexture; } else if (plMipmap::ConvertNoRef(texture) != nullptr || plRenderTarget::ConvertNoRef(texture) != nullptr) { - passTypes[index] = PassTypeTexture; + fPassTypes[index] = PassTypeTexture; } else { - passTypes[index] = PassTypeColor; + fPassTypes[index] = PassTypeColor; } } else { - passTypes[index] = PassTypeColor; + fPassTypes[index] = PassTypeColor; } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 00f83516a9..61882871b6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -50,6 +50,17 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMetalDevice.h" #include "plSurface/plShaderTable.h" +enum plMetalPipelineType +{ + // Unknown is for abstract types, don't use it + Unknown = 0, + MaterialShader, + ShadowCaster, + ShadowRender, + Clear, + Dynamic +}; + //MARK: Base pipeline state class plMetalPipelineState @@ -68,7 +79,7 @@ class plMetalPipelineState } virtual size_t GetHash() const; virtual bool IsEqual(const plMetalPipelineState& p) const = 0; - virtual uint16_t GetID() const { return 0; }; + virtual uint16_t GetID() const { return plMetalPipelineType::Unknown; }; virtual plMetalPipelineState* Clone() = 0; // @@ -129,16 +140,16 @@ class plMetalRenderSpanPipelineState : public plMetalPipelineState struct plMetalFragmentShaderDescription { - uint8_t passTypes[8]; - uint32_t blendModes[8]; - uint32_t miscFlags[8]; - uint8_t numLayers; + uint8_t fPassTypes[8]; + uint32_t fBlendModes[8]; + uint32_t fMiscFlags[8]; + uint8_t fNumLayers; size_t hash; bool operator==(const plMetalFragmentShaderDescription& p) const { - bool match = numLayers == p.numLayers && memcmp(passTypes, p.passTypes, sizeof(passTypes)) == 0 && memcmp(blendModes, p.blendModes, sizeof(blendModes)) == 0 && memcmp(miscFlags, p.miscFlags, sizeof(miscFlags)) == 0; + bool match = fNumLayers == p.fNumLayers && memcmp(fPassTypes, p.fPassTypes, sizeof(fPassTypes)) == 0 && memcmp(fBlendModes, p.fBlendModes, sizeof(fBlendModes)) == 0 && memcmp(fMiscFlags, p.fMiscFlags, sizeof(fMiscFlags)) == 0; return match; } @@ -153,19 +164,19 @@ struct plMetalFragmentShaderDescription if (hash) return hash; - std::size_t value = std::hash()(numLayers); - value ^= std::hash()(numLayers); + std::size_t value = std::hash()(fNumLayers); + value ^= std::hash()(fNumLayers); for (int i = 0; i < 8; i++) { - value ^= std::hash()(blendModes[i]); + value ^= std::hash()(fBlendModes[i]); } for (int i = 0; i < 8; i++) { - value ^= std::hash()(miscFlags[i]); + value ^= std::hash()(fMiscFlags[i]); } for (int i = 0; i < 8; i++) { - value ^= std::hash()(passTypes[i]); + value ^= std::hash()(fPassTypes[i]); } return value; @@ -178,7 +189,7 @@ struct plMetalFragmentShaderDescription template <> struct std::hash { - std::size_t operator()(plMetalFragmentShaderDescription const& s) const noexcept + size_t operator()(plMetalFragmentShaderDescription const& s) const noexcept { return s.GetHash(); } @@ -198,7 +209,7 @@ class plMetalMaterialPassPipelineState : public plMetalRenderSpanPipelineState bool IsEqual(const plMetalPipelineState& p) const override; - uint16_t GetID() const override { return 1; }; + uint16_t GetID() const override { return plMetalPipelineType::MaterialShader; }; plMetalPipelineState* Clone() override { @@ -233,7 +244,7 @@ class plMetalRenderShadowCasterPipelineState : public plMetalRenderSpanPipelineS descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); }; - uint16_t GetID() const override { return 2; }; + uint16_t GetID() const override { return plMetalPipelineType::ShadowCaster; }; plMetalPipelineState* Clone() override { @@ -258,7 +269,7 @@ class plMetalRenderShadowPipelineState : public plMetalMaterialPassPipelineState MTL::Function* GetVertexFunction(MTL::Library* library) override; MTL::Function* GetFragmentFunction(MTL::Library* library) override; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; - uint16_t GetID() const override { return 3; }; + uint16_t GetID() const override { return plMetalPipelineType::ShadowRender; }; plMetalPipelineState* Clone() override { @@ -275,9 +286,11 @@ class plMetalDynamicMaterialPipelineState : public plMetalRenderSpanPipelineStat : plMetalRenderSpanPipelineState(device, vRef), fVertexShaderID(vertexShaderID), fFragmentShaderID(fragmentShaderID), - fBlendMode(blendMode){ - - }; + fBlendMode(blendMode) + { + }; + + uint16_t GetID() const override { return plMetalPipelineType::Dynamic; }; plMetalPipelineState* Clone() override { @@ -351,7 +364,7 @@ class plMetalClearPipelineState : public plMetalPipelineState return clearState->fShouldClearDepth == fShouldClearDepth && fShouldClearColor == clearState->fShouldClearColor; }; - uint16_t GetID() const override { return 4; }; + uint16_t GetID() const override { return plMetalPipelineType::Clear; }; plMetalPipelineState* Clone() override { return new plMetalClearPipelineState(*this); @@ -398,7 +411,7 @@ class plMetalClearPipelineState : public plMetalPipelineState size_t GetHash() const override { - std::size_t value = plMetalPipelineState::GetHash(); + size_t value = plMetalPipelineState::GetHash(); value ^= std::hash()(fShouldClearColor); value ^= std::hash()(fShouldClearDepth); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 23805c8288..4e927dc8b5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -47,7 +47,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "ShaderTypes.h" #include "plMetalPipeline.h" -plMetalPlateManager::plMetalPlateManager(plMetalPipeline *pipe) +plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) : plPlateManager(pipe), fVtxBuffer(nullptr) { @@ -62,7 +62,7 @@ void plMetalPlateManager::ICreateGeometry() { plMetalPipeline *pipeline = (plMetalPipeline *)fOwner; if (!fVtxBuffer) { - struct plateVertexBuffer vertexBuffer; + plateVertexBuffer vertexBuffer; vertexBuffer.vertices[0].Set(-0.5f, -0.5f); vertexBuffer.uv[0].Set(0.0f, 0.0f); @@ -83,7 +83,7 @@ void plMetalPlateManager::ICreateGeometry() } } -void plMetalPlateManager::EncodeDraw(MTL::RenderCommandEncoder *encoder) +void plMetalPlateManager::EncodeDraw(MTL::RenderCommandEncoder* encoder) { encoder->setVertexBuffer(fVtxBuffer, 0, VertexAttributePosition); encoder->setVertexBuffer(fVtxBuffer, offsetof(plateVertexBuffer, uv), VertexAttributeTexcoord); @@ -99,7 +99,7 @@ void plMetalPlateManager::IReleaseGeometry() } } -void plMetalPlateManager::IDrawToDevice(plPipeline *pipe) +void plMetalPlateManager::IDrawToDevice(plPipeline* pipe) { plMetalPipeline *pipeline = (plMetalPipeline *)pipe; plPlate *plate = nullptr; @@ -116,39 +116,39 @@ plMetalPlateManager::~plMetalPlateManager() IReleaseGeometry(); } -bool plMetalPlatePipelineState::IsEqual(const plMetalPipelineState &p) const +bool plMetalPlatePipelineState::IsEqual(const plMetalPipelineState& p) const { return true; } -plMetalPipelineState *plMetalPlatePipelineState::Clone() +plMetalPipelineState* plMetalPlatePipelineState::Clone() { return new plMetalPlatePipelineState(fDevice); } -const MTL::Function *plMetalPlatePipelineState::GetVertexFunction(MTL::Library *library) +const MTL::Function* plMetalPlatePipelineState::GetVertexFunction(MTL::Library* library) { return library->newFunction(MTLSTR("plateVertexShader")); } -const MTL::Function *plMetalPlatePipelineState::GetFragmentFunction(MTL::Library *library) +const MTL::Function* plMetalPlatePipelineState::GetFragmentFunction(MTL::Library* library) { return library->newFunction(MTLSTR("fragmentShader")); } -const NS::String *plMetalPlatePipelineState::GetDescription() +const NS::String* plMetalPlatePipelineState::GetDescription() { return MTLSTR("Plate Pipeline State"); } -void plMetalPlatePipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) +void plMetalPlatePipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) { descriptor->setBlendingEnabled(true); descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); } -void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) +void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) { vertexDescriptor->attributes()->object(0)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(0)->setBufferIndex(VertexAttributePosition); @@ -161,6 +161,6 @@ void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 2); } -void plMetalPlatePipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const +void plMetalPlatePipelineState::GetFunctionConstants(MTL::FunctionConstantValues*) const { } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h index 60e8cfbc5b..3b824766f9 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -61,10 +61,10 @@ class plMetalPlatePipelineState : public plMetalPipelineState plMetalPlatePipelineState(plMetalDevice *device) : plMetalPipelineState(device){}; bool IsEqual(const plMetalPipelineState &p) const override; uint16_t GetID() const override { return 5; }; - plMetalPipelineState *Clone() override; - const MTL::Function *GetVertexFunction(MTL::Library *library) override; - const MTL::Function *GetFragmentFunction(MTL::Library *library) override; - const NS::String *GetDescription() override; + plMetalPipelineState* Clone() override; + const MTL::Function * GetVertexFunction(MTL::Library *library) override; + const MTL::Function * GetFragmentFunction(MTL::Library *library) override; + const NS::String* GetDescription() override; void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; @@ -78,11 +78,11 @@ class plMetalPlateManager : public plPlateManager friend class plMetalPipeline; public: - plMetalPlateManager(plMetalPipeline *pipe); - void IDrawToDevice(plPipeline *pipe) override; + plMetalPlateManager(plMetalPipeline* pipe); + void IDrawToDevice(plPipeline* pipe) override; void ICreateGeometry(); void IReleaseGeometry(); - void EncodeDraw(MTL::RenderCommandEncoder *encoder); + void EncodeDraw(MTL::RenderCommandEncoder* encoder); ~plMetalPlateManager(); private: @@ -91,9 +91,9 @@ class plMetalPlateManager : public plPlateManager hsPoint2 vertices[4]; hsPoint2 uv[4]; }; - MTL::Buffer *fVtxBuffer; - MTL::Buffer *idxBuffer; - MTL::DepthStencilState *fDepthState; + MTL::Buffer* fVtxBuffer; + MTL::Buffer* idxBuffer; + MTL::DepthStencilState* fDepthState; }; #endif /* plMetalPlateManager_hpp */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp index 3e0b225e01..a82c6f03e3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.cpp @@ -47,16 +47,14 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com plMetalShader::plMetalShader(plShader* owner) : fOwner(owner), - fPipe(nil) + fPipe() { owner->SetDeviceRef(this); } plMetalShader::~plMetalShader() { - fPipe = nil; - - // ISetError(nil); + fPipe = nullptr; } void plMetalShader::SetOwner(plShader* owner) @@ -67,14 +65,3 @@ void plMetalShader::SetOwner(plShader* owner) owner->SetDeviceRef(this); } } - -/*HRESULT plMetalShader::IOnError(HRESULT hr, const char* errStr) -{ - ISetError(errStr); - - fOwner->Invalidate(); - - hsStatusMessage(errStr); - - return hr; -}*/ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h index 8c5c6a1952..157ae6ce28 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h @@ -55,23 +55,17 @@ class plMetalShader : public plMetalDeviceRef { protected: plShader* fOwner; - // ST::string fErrorString; plMetalPipeline* fPipe; MTL::Function* fFunction; - - // HRESULT IOnError(HRESULT hr, const char* errStr); - // void ISetError(const char* errStr) { fErrorString = errStr; } - - // virtual HRESULT ICreate(plDXPipeline* pipe) = 0; + virtual bool ISetConstants(plMetalPipeline* pipe) = 0; // On error, sets error string. public: plMetalShader(plShader* owner); virtual ~plMetalShader(); - // ST::string GetErrorString() const { return fErrorString; } void SetOwner(plShader* owner); - MTL::Function* GetShader(plMetalPipeline* pipe) { return fFunction; }; + MTL::Function* GetShader(plMetalPipeline* pipe) const { return fFunction; }; }; #endif // plDXShader_inc From 8dab1e37259f9a3b770c52c680e951c5acb85c8f Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 4 Nov 2023 14:26:32 -0700 Subject: [PATCH 149/165] Another set of changes --- .../ShaderSrc/FixedPipelineShaders.metal | 2 +- .../pfMetalPipeline/plMetalPipelineState.h | 3 +- .../pfMetalPipeline/plMetalPlateManager.h | 2 +- .../pfMetalPipeline/plMetalTextFont.cpp | 217 +++--------------- .../pfMetalPipeline/plMetalTextFont.h | 2 +- .../pfMetalPipeline/plMetalVertexShader.cpp | 10 +- .../pfMetalPipeline/plMetalVertexShader.h | 4 +- .../PubUtilLib/plSurface/hsGMaterial.cpp | 3 - .../Plasma/PubUtilLib/plSurface/hsGMaterial.h | 4 - 9 files changed, 39 insertions(+), 208 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 2fb497d31e..77581983c7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -200,7 +200,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], if (lightSource->scale == 0.0h) continue; - // w is attenation + // direction.w is attenuation float4 direction; if (lightSource->position.w == 0.0) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 61882871b6..021baa0bcc 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -58,7 +58,8 @@ enum plMetalPipelineType ShadowCaster, ShadowRender, Clear, - Dynamic + Dynamic, + Text }; //MARK: Base pipeline state diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h index 3b824766f9..27d579302a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -70,7 +70,7 @@ class plMetalPlatePipelineState : public plMetalPipelineState void ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) override; - void GetFunctionConstants(MTL::FunctionConstantValues *) const override; + void GetFunctionConstants(MTL::FunctionConstantValues*) const override; }; class plMetalPlateManager : public plPlateManager diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp index ac7a398fe6..3f8a412ca4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.cpp @@ -39,16 +39,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com Mead, WA 99021 *==LICENSE==*/ -/////////////////////////////////////////////////////////////////////////////// -// // -// plDXTextFont Class Functions // -// Cyan, Inc. // -// // -//// Version History ////////////////////////////////////////////////////////// -// // -// 2.19.2001 mcn - Created. // -// // -/////////////////////////////////////////////////////////////////////////////// #include "plMetalTextFont.h" @@ -61,14 +51,13 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com // * 4 primitives per char max (for bold text) // * 3 verts per primitive -// const uint32_t kNumVertsInBuffer(32768); -const uint32_t kNumVertsInBuffer(4608); +constexpr uint32_t kNumVertsInBuffer = 4608; uint32_t plMetalTextFont::fBufferCursor = 0; //// Constructor & Destructor ///////////////////////////////////////////////// -plMetalTextFont::plMetalTextFont(plPipeline *pipe, plMetalDevice *device) : plTextFont(pipe), +plMetalTextFont::plMetalTextFont(plPipeline *pipe, plMetalDevice* device) : plTextFont(pipe), fTexture() { fDevice = device; @@ -85,8 +74,6 @@ plMetalTextFont::~plMetalTextFont() void plMetalTextFont::ICreateTexture(uint16_t *data) { - printf("Create texture\n"); - MTL::TextureDescriptor *descriptor = MTL::TextureDescriptor::texture2DDescriptor(MTL::PixelFormatRGBA8Unorm, fTextureWidth, fTextureHeight, false); fTexture->release(); @@ -109,10 +96,10 @@ void plMetalTextFont::ICreateTexture(uint16_t *data) uint8_t a; }; - uint32_t *outData = new uint32_t[fTextureWidth * fTextureHeight]; - for (int i = 0; i < fTextureWidth * fTextureHeight; i++) { - InDataValues *in = (InDataValues *)(data + i); - OutDataValues *out = (OutDataValues *)(outData + i); + auto outData = std::make_unique(fTextureWidth * fTextureHeight); + for (size_t i = 0; i < fTextureWidth * fTextureHeight; i++) { + InDataValues* in = (InDataValues*)(data + i); + OutDataValues* out = (OutDataValues*)(outData.get() + i); out->r = in->r * 255; out->b = in->b * 255; @@ -120,34 +107,14 @@ void plMetalTextFont::ICreateTexture(uint16_t *data) out->a = in->a * 255; } - fTexture->replaceRegion(MTL::Region(0, 0, fTextureWidth, fTextureHeight), 0, outData, 4 * fTextureWidth); - delete[] outData; - /* - HRESULT hr; - D3DLOCKED_RECT lockInfo; - D3DCAPS9 d3dCaps; - - - // Check to make sure we can support it - fDevice->GetDeviceCaps( &d3dCaps ); - hsAssert( fTextureWidth <= d3dCaps.MaxTextureWidth, "Cannot initialize DX font--texture size too big" ); - - // Create our texture object - hr = fDevice->CreateTexture(fTextureWidth, fTextureHeight, 1, 0, D3DFMT_A4R4G4B4, D3DPOOL_MANAGED, &fD3DTexture, nullptr); - hsAssert( !FAILED( hr ), "Cannot create D3D texture" ); - - // Lock the texture and write our values out - fD3DTexture->LockRect(0, &lockInfo, nullptr, 0); - memcpy( lockInfo.pBits, data, fTextureWidth * fTextureHeight * sizeof( uint16_t ) ); - fD3DTexture->UnlockRect( 0 ); - */ + fTexture->replaceRegion(MTL::Region(0, 0, fTextureWidth, fTextureHeight), 0, outData.get(), 4 * fTextureWidth); } -void plMetalTextFont::CreateShared(plMetalDevice *device) +void plMetalTextFont::CreateShared(plMetalDevice* device) { } -void plMetalTextFont::ReleaseShared(MTL::Device *device) +void plMetalTextFont::ReleaseShared(MTL::Device* device) { } @@ -155,51 +122,6 @@ void plMetalTextFont::ReleaseShared(MTL::Device *device) void plMetalTextFont::IInitStateBlocks() { - /* - for( int i = 0; i < 2; i++ ) - { - fDevice->BeginStateBlock(); - fDevice->SetRenderState( D3DRS_ALPHABLENDENABLE, TRUE ); - fDevice->SetRenderState( D3DRS_SRCBLEND, D3DBLEND_SRCALPHA ); - fDevice->SetRenderState( D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA ); - fDevice->SetRenderState( D3DRS_ALPHATESTENABLE, TRUE ); - fDevice->SetRenderState( D3DRS_ALPHAREF, 0x08 ); - fDevice->SetRenderState( D3DRS_ALPHAFUNC, D3DCMP_GREATEREQUAL ); - fDevice->SetRenderState( D3DRS_FILLMODE, D3DFILL_SOLID ); - fDevice->SetRenderState( D3DRS_CULLMODE, D3DCULL_CCW ); - - fDevice->SetRenderState( D3DRS_ZENABLE, TRUE ); - fDevice->SetRenderState( D3DRS_ZFUNC, D3DCMP_ALWAYS ); - fDevice->SetRenderState( D3DRS_ZWRITEENABLE, TRUE ); - fDevice->SetRenderState( D3DRS_DEPTHBIAS, 0 ); - - fDevice->SetRenderState( D3DRS_STENCILENABLE, FALSE ); - fDevice->SetRenderState( D3DRS_CLIPPING, TRUE ); - fDevice->SetRenderState( D3DRS_ANTIALIASEDLINEENABLE, FALSE ); - fDevice->SetRenderState( D3DRS_VERTEXBLEND, FALSE ); - fDevice->SetRenderState( D3DRS_INDEXEDVERTEXBLENDENABLE, FALSE ); - fDevice->SetRenderState( D3DRS_FOGENABLE, FALSE ); - fDevice->SetTextureStageState( 0, D3DTSS_COLOROP, D3DTOP_MODULATE ); - fDevice->SetTextureStageState( 0, D3DTSS_COLORARG1, D3DTA_TEXTURE ); - fDevice->SetTextureStageState( 0, D3DTSS_COLORARG2, D3DTA_DIFFUSE ); - fDevice->SetTextureStageState( 0, D3DTSS_ALPHAOP, D3DTOP_MODULATE ); - fDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG1, D3DTA_TEXTURE ); - fDevice->SetTextureStageState( 0, D3DTSS_ALPHAARG2, D3DTA_DIFFUSE ); - fDevice->SetSamplerState( 0, D3DSAMP_MINFILTER, D3DTEXF_POINT ); - fDevice->SetSamplerState( 0, D3DSAMP_MAGFILTER, D3DTEXF_POINT ); - fDevice->SetSamplerState( 0, D3DSAMP_MIPFILTER, D3DTEXF_NONE ); - fDevice->SetTextureStageState( 0, D3DTSS_TEXCOORDINDEX, 0 ); - fDevice->SetTextureStageState( 0, D3DTSS_TEXTURETRANSFORMFLAGS, D3DTTFF_COUNT2 ); - fDevice->SetTextureStageState( 1, D3DTSS_COLOROP, D3DTOP_DISABLE ); - fDevice->SetTextureStageState( 1, D3DTSS_ALPHAOP, D3DTOP_DISABLE ); - fDevice->SetRenderState( D3DRS_LIGHTING, FALSE ); - - if( i == 0 ) - fDevice->EndStateBlock( &fOldStateBlock ); - else - fDevice->EndStateBlock( &fTextStateBlock ); - } - */ } //// DestroyObjects /////////////////////////////////////////////////////////// @@ -211,69 +133,32 @@ void plMetalTextFont::DestroyObjects() //// IDrawPrimitive /////////////////////////////////////////////////////////// -void plMetalTextFont::IDrawPrimitive(uint32_t count, plFontVertex *array) +void plMetalTextFont::IDrawPrimitive(uint32_t count, plFontVertex* array) { - plFontVertex *v; + plFontVertex* v; - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); - const uint maxCount = 4096 / (sizeof(plFontVertex) * 3); - uint drawm = 0; + constexpr size_t maxCount = 4096 / (sizeof(plFontVertex) * 3); + + uint drawn = 0; while (count > 0) { uint drawCount = MIN(maxCount, count); - fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array + (drawm * 3), drawCount * 3 * sizeof(plFontVertex), 0); + fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array + (drawn * 3), drawCount * 3 * sizeof(plFontVertex), 0); fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangle, NS::UInteger(0), drawCount * 3); count -= drawCount; - drawm += drawCount; - } - - // if( !fBuffer ) - // return; - - /// Lock the buffer and write to it - /*if( fBufferCursor && (fBufferCursor + count * 3 < kNumVertsInBuffer) ) - { - // We can lock part of it - if( FAILED( fBuffer->Lock( fBufferCursor * sizeof( plFontVertex ), - count * 3 * sizeof( plFontVertex ), - (void **)&v, D3DLOCK_NOOVERWRITE ) ) ) - { - hsAssert( false, "Failed to lock vertex buffer for writing" ); - return; - } - - fBufferCursor += count * 3; + drawn += drawCount; } - else - { - // Gotta start over - FlushDraws(); - fBufferCursor = count * 3; - - if( FAILED( fBuffer->Lock( 0, count * 3 * sizeof( plFontVertex ), - (void **)&v, D3DLOCK_DISCARD ) ) ) - { - hsAssert( false, "Failed to lock vertex buffer for writing" ); - return; - } - } - - if (v != nullptr && array != nullptr) - { - memcpy( v, array, count * sizeof( plFontVertex ) * 3 ); - } - - fBuffer->Unlock();*/ } //// IDrawLines /////////////////////////////////////////////////////////////// -void plMetalTextFont::IDrawLines(uint32_t count, plFontVertex *array) +void plMetalTextFont::IDrawLines(uint32_t count, plFontVertex* array) { - plMetalDevice::plMetalLinkedPipeline *linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); + plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalTextFontPipelineState(fDevice).GetRenderPipelineState(); fPipeline->fDevice.CurrentRenderCommandEncoder()->setRenderPipelineState(linkedPipeline->pipelineState); fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(array, count * 2 * sizeof(plFontVertex), 0); @@ -287,16 +172,6 @@ void plMetalTextFont::IDrawLines(uint32_t count, plFontVertex *array) fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); fPipeline->fDevice.CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeLine, NS::UInteger(0), count * 2); - /*if( !fBuffer ) - return; - - if (count == 0 || array == nullptr) - return; - - fDevice->SetVertexShader(nullptr); - fDevice->SetFVF(kFVF); - fDevice->SetStreamSource(0, fBuffer, 0, sizeof(plFontVertex)); - fDevice->DrawPrimitiveUP( D3DPT_LINELIST, count, (const void *)array, sizeof( plFontVertex ) );*/ } //// FlushDraws /////////////////////////////////////////////////////////////// @@ -304,17 +179,7 @@ void plMetalTextFont::IDrawLines(uint32_t count, plFontVertex *array) void plMetalTextFont::FlushDraws() { - /*if( !fBuffer ) - return; - - if( fBufferCursor > 0 ) - { - fDevice->SetVertexShader(nullptr); - fDevice->SetFVF(kFVF); - fDevice->SetStreamSource( 0, fBuffer, 0, sizeof( plFontVertex ) ); - fDevice->DrawPrimitive( D3DPT_TRIANGLELIST, 0, fBufferCursor / 3 ); - fBufferCursor = 0; - }*/ + // Metal don't flush } //// SaveStates /////////////////////////////////////////////////////////////// @@ -328,77 +193,51 @@ void plMetalTextFont::SaveStates() mat.columns[3][1] = 1.0; fPipeline->fDevice.CurrentRenderCommandEncoder()->setVertexBytes(&mat, sizeof(matrix_float4x4), 1); fPipeline->fDevice.CurrentRenderCommandEncoder()->setFragmentTexture(fTexture, 0); - /*if( !fInitialized ) - IInitObjects(); - - if (fOldStateBlock) - fOldStateBlock->Capture(); - if (fTextStateBlock) - fTextStateBlock->Apply(); - - fDevice->SetTexture( 0, fD3DTexture ); - fDevice->SetTransform( D3DTS_TEXTURE0, &d3dIdentityMatrix ); - - /// Set up the transform matrices so that the vertices can range (0-screenWidth,0-screenHeight) - fDevice->SetTransform( D3DTS_WORLD, &d3dIdentityMatrix ); - fDevice->SetTransform( D3DTS_VIEW, &d3dIdentityMatrix ); - D3DMATRIX mat; - mat = d3dIdentityMatrix; - mat.m[0][0] = 2.0f / (float)fPipe->Width(); - mat.m[1][1] = -2.0f / (float)fPipe->Height(); - mat.m[3][0] = -1.0; - mat.m[3][1] = 1.0; - fDevice->SetTransform( D3DTS_PROJECTION, &mat );*/ } //// RestoreStates //////////////////////////////////////////////////////////// void plMetalTextFont::RestoreStates() { - /*if (fOldStateBlock) - fOldStateBlock->Apply(); - - fDevice->SetTexture(0, nullptr); - fDevice->SetTransform( D3DTS_TEXTURE0, &d3dIdentityMatrix );*/ } -bool plMetalTextFontPipelineState::IsEqual(const plMetalPipelineState &p) const +bool plMetalTextFontPipelineState::IsEqual(const plMetalPipelineState& p) const { return true; } -plMetalPipelineState *plMetalTextFontPipelineState::Clone() +plMetalPipelineState* plMetalTextFontPipelineState::Clone() { return new plMetalTextFontPipelineState(fDevice); } -const MTL::Function *plMetalTextFontPipelineState::GetVertexFunction(MTL::Library *library) +const MTL::Function* plMetalTextFontPipelineState::GetVertexFunction(MTL::Library* library) { return library->newFunction(MTLSTR("textFontVertexShader")); } -const MTL::Function *plMetalTextFontPipelineState::GetFragmentFunction(MTL::Library *library) +const MTL::Function* plMetalTextFontPipelineState::GetFragmentFunction(MTL::Library* library) { return library->newFunction(MTLSTR("textFontFragmentShader")); } -const NS::String *plMetalTextFontPipelineState::GetDescription() +const NS::String* plMetalTextFontPipelineState::GetDescription() { return MTLSTR("Font Rendering"); } -void plMetalTextFontPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) +void plMetalTextFontPipelineState::ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) { descriptor->setSourceRGBBlendFactor(MTL::BlendFactorSourceAlpha); descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); } -void plMetalTextFontPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) +void plMetalTextFontPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) { return; } -void plMetalTextFontPipelineState::GetFunctionConstants(MTL::FunctionConstantValues *) const +void plMetalTextFontPipelineState::GetFunctionConstants(MTL::FunctionConstantValues*) const { return; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h index f5eed85271..061dbfb5ee 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -58,7 +58,7 @@ class plMetalTextFontPipelineState : public plMetalPipelineState public: plMetalTextFontPipelineState(plMetalDevice* device) : plMetalPipelineState(device){}; bool IsEqual(const plMetalPipelineState& p) const override; - uint16_t GetID() const override { return 6; }; + uint16_t GetID() const override { return plMetalPipelineType::Text; }; plMetalPipelineState* Clone() override; const MTL::Function* GetVertexFunction(MTL::Library* library) override; const MTL::Function* GetFragmentFunction(MTL::Library* library) override; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp index ebe2aa0a8d..7aeedf0f3b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp @@ -41,14 +41,14 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ #include "plMetalVertexShader.h" -#include - #include "HeadSpin.h" -#include "hsWindows.h" + #include "plDrawable/plGBufferGroup.h" #include "plMetalPipeline.h" #include "plSurface/plShader.h" +#include + plMetalVertexShader::plMetalVertexShader(plShader* owner) : plMetalShader(owner) { @@ -61,9 +61,7 @@ plMetalVertexShader::~plMetalVertexShader() void plMetalVertexShader::Release() { - fPipe = nil; - - // ISetError(nil); + fPipe = nullptr; } bool plMetalVertexShader::ISetConstants(plMetalPipeline* pipe) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h index 0364dd789c..0f040ac832 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h @@ -54,10 +54,10 @@ class plMetalVertexShader : public plMetalShader public: bool ISetConstants(plMetalPipeline* pipe) override; // On error, sets error string. plMetalVertexShader(plShader* owner); - virtual ~plMetalVertexShader(); + ~plMetalVertexShader() override; void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalVertexShader* GetNext() { return (plMetalVertexShader*)fNext; } + plMetalVertexShader* GetNext() const { return (plMetalVertexShader*)fNext; } void Release() override; }; diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp index e97d5752e3..a2bafdad28 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp @@ -207,13 +207,10 @@ void hsGMaterial::SetLayer(plLayerInterface* layer, int32_t which, bool insert, } } -#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL void hsGMaterial::SetDeviceRef(hsGDeviceRef* ref) { hsRefCnt_SafeAssign(fDeviceRef, ref); } -#endif - void hsGMaterial::Write(hsStream* s) { diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h index a7bad93b40..72ee7b81c7 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h @@ -93,9 +93,7 @@ class hsGMaterial : public plSynchedObject float fLastUpdateTime; -#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL hsGDeviceRef* fDeviceRef; -#endif void IClearLayers(); size_t IMakeExtraLayer(); @@ -134,10 +132,8 @@ class hsGMaterial : public plSynchedObject bool IsDecal() const { return (fCompFlags & kCompDecal); } bool NeedsBlendChannel() { return (fCompFlags & kCompNeedsBlendChannel); } -#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL void SetDeviceRef(hsGDeviceRef* ref); hsGDeviceRef* GetDeviceRef() const { return fDeviceRef; } -#endif virtual void Read(hsStream* s); virtual void Write(hsStream* s); From 0705ab6565da98e0196c9986cb5c5ca096e0c81c Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 4 Nov 2023 15:04:12 -0700 Subject: [PATCH 150/165] More feedback implementation --- .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 1 - .../ShaderSrc/FixedPipelineShaders.metal | 4 +- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 23 ++++++++ .../pfMetalPipeline/plMetalDevice.cpp | 6 +- .../pfMetalPipeline/plMetalDevice.h | 4 +- .../pfMetalPipeline/plMetalDeviceRef.h | 10 ++-- .../plMetalMaterialShaderRef.h | 16 +++--- .../pfMetalPipeline/plMetalPipeline.cpp | 55 +++++++++---------- .../pfMetalPipeline/plMetalPipeline.h | 2 +- .../pfMetalPipeline/plMetalPipelineState.cpp | 2 +- .../pfMetalPipeline/plMetalPipelineState.h | 8 ++- .../pfMetalPipeline/plMetalPlateManager.h | 14 ++--- .../PubUtilLib/plPipeline/pl3DPipeline.h | 2 +- .../PubUtilLib/plSurface/hsGMaterial.cpp | 9 +-- .../Plasma/PubUtilLib/plSurface/hsGMaterial.h | 5 +- 15 files changed, 89 insertions(+), 72 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index cd55e74749..8f3a9cb084 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -2,7 +2,6 @@ include(FetchContent) FetchContent_Declare( metalcpp - DOWNLOAD_EXTRACT_TIMESTAMP TRUE URL_HASH_SHA256 0afd87ca851465191ae4e3980aa036c7e9e02fe32e7c760ac1a74244aae6023b URL "https://developer.apple.com/metal/cpp/files/metal-cpp_macOS13.3_iOS16.4.zip" ) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 77581983c7..267c2f5b2b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -403,9 +403,9 @@ half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 verte } // do the actual sample - if(passType == PassTypeTexture) { + if (passType == PassTypeTexture) { return (&textures)[index].sample((&samplers)[index], sampleCoord.xy); - } else if(passType == PassTypeCubicTexture) { + } else if (passType == PassTypeCubicTexture) { return (&cubicTextures)[index].sample((&samplers)[index], sampleCoord.xyz); } else { return half4(0); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index e0375ed9a9..241b586240 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -46,6 +46,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #ifndef __METAL_VERSION__ +#include + typedef _Float16 half; typedef __attribute__((__ext_vector_type__(2))) half half2; typedef __attribute__((__ext_vector_type__(3))) half half3; @@ -120,10 +122,16 @@ enum plMetalLayerPassType: uint8_t struct plMetalFragmentShaderArgumentBuffer { __fp16 alphaThreshold; }; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalFragmentShaderArgumentBuffer must be a trivial type!"); +#endif struct plMetalShadowCastFragmentShaderArgumentBuffer { bool pointLightCast; }; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalShadowCastFragmentShaderArgumentBuffer must be a trivial type!"); +#endif enum plMetalFragmentShaderTextures { FragmentShaderArgumentAttributeTextures = 0, @@ -143,12 +151,18 @@ struct plMetalShaderLightSource { __fp16 quadAtten; __fp16 scale; }; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalShaderLightSource must be a trivial type!"); +#endif typedef struct { uint32_t UVWSrc; matrix_float4x4 transform; } UVOutDescriptor; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "UVOutDescriptor must be a trivial type!"); +#endif typedef struct { @@ -180,6 +194,9 @@ typedef struct half4 calcFog(float4 camPosition) constant; #endif } VertexUniforms; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "VertexUniforms must be a trivial type!"); +#endif #define kMetalMaxLightCount 32 @@ -187,6 +204,9 @@ typedef struct { uint8_t count; plMetalShaderLightSource lampSources[kMetalMaxLightCount]; } plMetalLights; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plMetalLights must be a trivial type!"); +#endif typedef struct { simd::float3 lightPosition; @@ -195,6 +215,9 @@ typedef struct { float power; half opacity; } plShadowState; +#ifndef __METAL_VERSION__ +static_assert(std::is_trivial_v, "plShadowState must be a trivial type!"); +#endif #endif /* ShaderTypes_h */ diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 8f14e4fc4d..94a4741f3a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -507,7 +507,7 @@ void plMetalDevice::FillVertexBufferRef(VertexBufferRef* ref, plGBufferGroup* gr const uint32_t size = group->GetVertBufferEnd(idx) * vertSize - vertStart; if (ref->GetBuffer()) { - assert(size <= ref->GetBuffer()->length()); + hsAssert(size <= ref->GetBuffer()->length(), "Allocated buffer does not fit fill data"); } if (!size) { @@ -1169,7 +1169,7 @@ void plMetalDevice::SubmitCommandBuffer() fClearDrawableDepth = 1.0; } -MTL::SamplerState* plMetalDevice::SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState) +MTL::SamplerState* plMetalDevice::SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState) const { return fSamplerStates[sampleState]; } @@ -1256,7 +1256,7 @@ MTL::RenderCommandEncoder* plMetalDevice::CurrentRenderCommandEncoder() return fCurrentRenderTargetCommandEncoder; } -CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() +CA::MetalDrawable* plMetalDevice::GetCurrentDrawable() const { return fCurrentDrawable; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 84767ca168..e24da29fc1 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -162,7 +162,7 @@ class plMetalDevice // Currently requires a CA drawable and not a Metal drawable. In since CA drawable is only abstract implementation I know about, not sure where we would find others? void CreateNewCommandBuffer(CA::MetalDrawable* drawable); MTL::CommandBuffer* GetCurrentCommandBuffer(); - CA::MetalDrawable* GetCurrentDrawable(); + CA::MetalDrawable* GetCurrentDrawable() const; /// Submit the command buffer to the GPU and draws all the render passes. Clears the current command buffer. void SubmitCommandBuffer(); void Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth); @@ -170,7 +170,7 @@ class plMetalDevice void SetMaxAnsiotropy(uint8_t maxAnsiotropy); void SetMSAASampleCount(uint8_t sampleCount); - MTL::SamplerState* SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState); + MTL::SamplerState* SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState) const; NS::UInteger CurrentTargetSampleCount() { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h index c7608d6aad..8206757594 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -61,8 +61,8 @@ class plMetalDeviceRef : public hsGDeviceRef public: void Unlink(); void Link(plMetalDeviceRef** back); - plMetalDeviceRef* GetNext() { return fNext; } - bool IsLinked() { return fBack != nullptr; } + plMetalDeviceRef* GetNext() const { return fNext; } + bool IsLinked() { return fBack != nullptr; } const bool HasFlag(uint32_t f) const { return 0 != (fFlags & f); } void SetFlag(uint32_t f, bool on) @@ -212,7 +212,7 @@ class plMetalVertexBufferRef : public plMetalBufferPoolRef virtual ~plMetalVertexBufferRef(); void Link(plMetalVertexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalVertexBufferRef* GetNext() const { return (plMetalVertexBufferRef*)fNext; } + plMetalVertexBufferRef* const GetNext() const { return (plMetalVertexBufferRef*)fNext; } void Release() override; }; @@ -244,7 +244,7 @@ class plMetalIndexBufferRef : public plMetalBufferPoolRef void Release() override; void Link(plMetalIndexBufferRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalIndexBufferRef* GetNext() { return (plMetalIndexBufferRef*)fNext; } + plMetalIndexBufferRef* const GetNext() { return (plMetalIndexBufferRef*)fNext; } virtual ~plMetalIndexBufferRef(); plMetalIndexBufferRef() : plMetalBufferPoolRef(), @@ -287,7 +287,7 @@ class plMetalRenderTargetRef : public plMetalTextureRef MTL::Texture* fDepthBuffer; void Link(plMetalRenderTargetRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalRenderTargetRef* GetNext() { return (plMetalRenderTargetRef*)fNext; } + plMetalRenderTargetRef* GetNext() const { return (plMetalRenderTargetRef*)fNext; } plMetalRenderTargetRef() : fDepthBuffer(nullptr) { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 84b7eea0e4..0638cfe2c8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -57,22 +57,22 @@ class plLayerInterface; class plMetalMaterialShaderRef : public plMetalDeviceRef { protected: - plMetalPipeline *fPipeline; - hsGMaterial *fMaterial; + plMetalPipeline* fPipeline; + hsGMaterial* fMaterial; // temporary holder for the fragment shader to use, we don't own this reference - MTL::Function *fFragFunction; + MTL::Function* fFragFunction; private: std::vector fPassIndices; // FIXME: This should be retained/released - MTL::Device *fDevice; - std::vector fPassArgumentBuffers; + MTL::Device* fDevice; + std::vector fPassArgumentBuffers; public: void Link(plMetalMaterialShaderRef **back) { plMetalDeviceRef::Link((plMetalDeviceRef **)back); } - plMetalMaterialShaderRef* GetNext() const { return (plMetalMaterialShaderRef *)fNext; } + plMetalMaterialShaderRef* GetNext() const { return (plMetalMaterialShaderRef*)fNext; } - plMetalMaterialShaderRef(hsGMaterial *mat, plMetalPipeline *pipe); + plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe); ~plMetalMaterialShaderRef(); void Release() override; @@ -116,7 +116,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef std::vector* piggybacks, const std::function& preEncodeTransform, const std::function& postEncodeTransform); - bool ICanEatLayer(plLayerInterface *lay); + bool ICanEatLayer(plLayerInterface* lay); uint32_t ILayersAtOnce(uint32_t which); void IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index d2dfeada91..e1d9664ff7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -367,7 +367,7 @@ void plMetalPipeline::ClearRenderTarget(const hsColorRGBA* col, const float* dep if (fView.fRenderState & (kRenderClearColor | kRenderClearDepth)) { hsColorRGBA clearColor = col ? *col : GetClearColor(); float clearDepth = depth ? *depth : fView.GetClearDepth(); - fDevice.Clear(fView.fRenderState & kRenderClearColor, {clearColor.r, clearColor.g, clearColor.b, clearColor.a}, fView.fRenderState & kRenderClearDepth, 1.0); + fDevice.Clear(fView.fRenderState & kRenderClearColor, { clearColor.r, clearColor.g, clearColor.b, clearColor.a }, fView.fRenderState & kRenderClearDepth, 1.0); fState.Reset(); } } @@ -978,11 +978,10 @@ void plMetalPipeline::GetSupportedDisplayModes(std::vector* res, */ std::vector supported; - plDisplayMode mode; - mode.Width = 800; - mode.Height = 600; - mode.ColorDepth = 32; - supported.push_back(mode); + supported.emplace_back(); + supported[0].Width = 800; + supported[0].Height = 600; + supported[0].ColorDepth = 32; *res = supported; } @@ -1361,7 +1360,7 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM // For each projector: int k; for (k = 0; k < fProjEach.size(); k++) { - // Push it's projected texture as a piggyback. + // Push its projected texture as a piggyback. plLightInfo* li = fProjEach[k]; plMetalMaterialShaderRef* mRef = static_cast(material->GetDeviceRef()); @@ -1384,7 +1383,7 @@ void plMetalPipeline::IRenderProjectionEach(const plRenderPrimFunc& render, hsGM RemoveLayerInterface(&layLightBase, false); - // Pop it's projected texture off piggyback + // Pop its projected texture off piggyback IPopProjPiggyBacks(); } } @@ -1539,11 +1538,11 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, /* The programmable pipeline doesn't do things like set the texture transform matrices, In practice, the transforms aren't set and used. Does it matter that the Metal - implementation doesn't implemention the full inputs the DX version gets? + implementation doesn't implement the full inputs the DX version gets? If it is implemented, the same checks the DX version does should be also implemented. DX will set texture transforms, but then turn them off in the pipeline and manually - manipulate texture co-ords in the shader. + manipulate texture coords in the shader. Texture setting should also _maybe_ be reconciled with the "fixed" pipeline. But the fixed pipeline uses indirect textures mapped to a buffer. That approach could @@ -1855,8 +1854,9 @@ void plMetalPipeline::ISetPipeConsts(plShader* shader) case plPipeConst::kPointLight3: case plPipeConst::kPointLight4: case plPipeConst::kColorFilter: - case plPipeConst::kMaxType: - break; + case plPipeConst::kMaxType: { + hsAssert(0, "Unimplemented uniform passed to shader"); + } break; } } } @@ -2532,7 +2532,7 @@ void plMetalPipeline::PopCurrentLightSources() plLayerInterface* plMetalPipeline::IPushOverBaseLayer(plLayerInterface* li) { if (!li) - return nil; + return nullptr; fOverLayerStack.emplace_back(li); @@ -2551,7 +2551,7 @@ plLayerInterface* plMetalPipeline::IPushOverBaseLayer(plLayerInterface* li) plLayerInterface* plMetalPipeline::IPopOverBaseLayer(plLayerInterface* li) { if (!li) - return nil; + return nullptr; fForceMatHandle = true; @@ -2569,7 +2569,7 @@ plLayerInterface* plMetalPipeline::IPopOverBaseLayer(plLayerInterface* li) plLayerInterface* plMetalPipeline::IPushOverAllLayer(plLayerInterface* li) { if (!li) - return nil; + return nullptr; fOverLayerStack.push_back(li); @@ -2592,7 +2592,7 @@ plLayerInterface* plMetalPipeline::IPushOverAllLayer(plLayerInterface* li) plLayerInterface* plMetalPipeline::IPopOverAllLayer(plLayerInterface* li) { if (!li) - return nil; + return nullptr; fForceMatHandle = true; @@ -2638,8 +2638,7 @@ void plMetalPipeline::IPushPiggyBacks(hsGMaterial* mat) if (fView.fRenderState & plPipeline::kRenderNoPiggyBacks) return; - int i; - for (i = 0; i < mat->GetNumPiggyBacks(); i++) { + for (int i = 0; i < mat->GetNumPiggyBacks(); i++) { if (!mat->GetPiggyBack(i)) continue; @@ -2916,7 +2915,7 @@ bool plMetalPipeline::ICreateDynDeviceObjects() // Create device-specific stuff fDebugTextMgr = new plDebugTextManager(); - if (fDebugTextMgr == nil) + if (fDebugTextMgr == nullptr) return true; // Vertex buffers, index buffers, textures, etc. @@ -2934,7 +2933,7 @@ void plMetalPipeline::IReleaseDynDeviceObjects() // We should do this earlier, but the textFont objects don't remove // themselves from their parent objects yet delete fDebugTextMgr; - fDebugTextMgr = nil; + fDebugTextMgr = nullptr; while (fTextFontRefList) delete fTextFontRefList; @@ -3397,10 +3396,10 @@ plRenderTarget* plMetalPipeline::IFindRenderTarget(uint32_t& width, uint32_t& he // about that. hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRenderTarget* owner) { - plMetalRenderTargetRef* ref = nil; - MTL::Texture* depthSurface = nil; - MTL::Texture* texture = nil; - MTL::Texture* cTexture = nil; + plMetalRenderTargetRef* ref = nullptr; + MTL::Texture* depthSurface = nullptr; + MTL::Texture* texture = nullptr; + MTL::Texture* cTexture = nullptr; int i; plCubicRenderTarget* cubicRT; uint16_t width, height; @@ -3435,7 +3434,7 @@ hsGDeviceRef* plMetalPipeline::SharedRenderTargetRef(plRenderTarget* share, plRe // if( !IFindRenderTargetInfo(owner, surfFormat, resType) ) //{ // hsAssert( false, "Error getting renderTarget info" ); - // return nil; + // return nullptr; //} /// Create the render target now @@ -3623,7 +3622,7 @@ void plMetalPipeline::IMakeRenderTargetPools() if (!SharedRenderTargetRef((*pool)[0], rt)) { delete rt; pool->resize(j + 1); - (*pool)[j] = nil; + (*pool)[j] = nullptr; break; } (*pool)[j] = rt; @@ -3964,7 +3963,7 @@ bool plMetalPipeline::IIsViewLeftHanded() } //// ISetCullMode ///////////////////////////////////////////////////////////// -// Tests and sets the current winding order cull mode (CW, CCW, or none). +// Tests and sets the current winding order cull mode (CW, CCW, or none). // Will reverse the cull mode as necessary for left handed camera or local to world // transforms. void plMetalPipeline::ISetCullMode(bool flip) @@ -3976,7 +3975,7 @@ void plMetalPipeline::ISetCullMode(bool flip) } } -plMetalDevice* plMetalPipeline::GetMetalDevice() +plMetalDevice* plMetalPipeline::GetMetalDevice() const { return &fDevice; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 07af6545f1..4e393ef16d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -151,7 +151,7 @@ class plMetalPipeline : public pl3DPipeline void IRenderAuxSpan(const plSpan& span, const plAuxSpan* aux); void IRenderAuxSpans(const plSpan& span); bool IHandleMaterialPass(hsGMaterial* material, uint32_t pass, const plSpan* currSpan, const plMetalVertexBufferRef* vRef, const bool allowShaders = true); - plMetalDevice* GetMetalDevice(); + plMetalDevice* GetMetalDevice() const; // Create and/or Refresh geometry buffers void CheckVertexBufferRef(plGBufferGroup* owner, uint32_t idx) override; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 640599a957..4f511dc026 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -269,7 +269,7 @@ MTL::Function* plMetalMaterialPassPipelineState::GetVertexFunction(MTL::Library* { NS::Error* error = nullptr; MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); - this->GetFunctionConstants(constants); + GetFunctionConstants(constants); MTL::Function* function = library->newFunction( NS::String::string("pipelineVertexShader", NS::ASCIIStringEncoding), MakeFunctionConstants(), diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 021baa0bcc..46e879600a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -59,7 +59,8 @@ enum plMetalPipelineType ShadowRender, Clear, Dynamic, - Text + Text, + Plate }; //MARK: Base pipeline state @@ -68,6 +69,8 @@ class plMetalPipelineState { public: plMetalPipelineState(plMetalDevice* device); + virtual ~plMetalPipelineState() = default; + plMetalDevice::plMetalLinkedPipeline* GetRenderPipelineState(); void PrewarmRenderPipelineState(); bool operator==(const plMetalPipelineState& p) const @@ -90,7 +93,6 @@ class plMetalPipelineState virtual void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) = 0; virtual void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) = 0; - virtual ~plMetalPipelineState() = default; protected: plMetalDevice* fDevice; @@ -132,7 +134,7 @@ class plMetalRenderSpanPipelineState : public plMetalPipelineState MTL::FunctionConstantValues* MakeFunctionConstants() { MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); - this->GetFunctionConstants(constants); + GetFunctionConstants(constants); return constants; } }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h index 27d579302a..490ae31388 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.h @@ -58,17 +58,17 @@ class plMetalDevice; class plMetalPlatePipelineState : public plMetalPipelineState { public: - plMetalPlatePipelineState(plMetalDevice *device) : plMetalPipelineState(device){}; - bool IsEqual(const plMetalPipelineState &p) const override; - uint16_t GetID() const override { return 5; }; + plMetalPlatePipelineState(plMetalDevice* device) : plMetalPipelineState(device){}; + bool IsEqual(const plMetalPipelineState& p) const override; + uint16_t GetID() const override { return plMetalPipelineType::Plate; } plMetalPipelineState* Clone() override; - const MTL::Function * GetVertexFunction(MTL::Library *library) override; - const MTL::Function * GetFragmentFunction(MTL::Library *library) override; + const MTL::Function * GetVertexFunction(MTL::Library* library) override; + const MTL::Function * GetFragmentFunction(MTL::Library* library) override; const NS::String* GetDescription() override; - void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor *descriptor) override; + void ConfigureBlend(MTL::RenderPipelineColorAttachmentDescriptor* descriptor) override; - void ConfigureVertexDescriptor(MTL::VertexDescriptor *vertexDescriptor) override; + void ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) override; void GetFunctionConstants(MTL::FunctionConstantValues*) const override; }; diff --git a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h index db1f395006..4ff9486fdb 100644 --- a/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h +++ b/Sources/Plasma/PubUtilLib/plPipeline/pl3DPipeline.h @@ -110,7 +110,7 @@ template class pl3DPipeline : public plPipeline { protected: - DeviceType fDevice; + mutable DeviceType fDevice; plPipelineViewSettings fView; std::stack fViewStack; diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp index a2bafdad28..bd2c6be948 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp @@ -40,8 +40,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com *==LICENSE==*/ -#include "hsGMaterial.h" #include +#include "hsGMaterial.h" #include "HeadSpin.h" #include "plProfile.h" @@ -52,8 +52,6 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "plMessage/plMatRefMsg.h" -#include "hsGDeviceRef.h" - plProfile_CreateTimer("MaterialAnims", "Animation", MaterialAnims); plLayer defaultLayer; @@ -207,11 +205,6 @@ void hsGMaterial::SetLayer(plLayerInterface* layer, int32_t which, bool insert, } } -void hsGMaterial::SetDeviceRef(hsGDeviceRef* ref) -{ - hsRefCnt_SafeAssign(fDeviceRef, ref); -} - void hsGMaterial::Write(hsStream* s) { s->WriteLE32(fLoadFlags); diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h index 72ee7b81c7..2f411d1b4a 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.h @@ -45,13 +45,13 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include #include "hsColorRGBA.h" +#include "hsGDeviceRef.h" #include "hsGMatState.h" #include "pnNetCommon/plSynchedObject.h" class hsScene; class hsResMgr; -class hsGDeviceRef; class hsG3DDevice; class plLayerInterface; class plLayer; @@ -132,7 +132,8 @@ class hsGMaterial : public plSynchedObject bool IsDecal() const { return (fCompFlags & kCompDecal); } bool NeedsBlendChannel() { return (fCompFlags & kCompNeedsBlendChannel); } - void SetDeviceRef(hsGDeviceRef* ref); + + void SetDeviceRef(hsGDeviceRef* ref) { hsRefCnt_SafeAssign(fDeviceRef, ref); } hsGDeviceRef* GetDeviceRef() const { return fDeviceRef; } virtual void Read(hsStream* s); From ea39b7ebd1566a8740b78a52e0c0e0cf2ec95dc8 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 5 Nov 2023 16:30:57 -0800 Subject: [PATCH 151/165] Work on sanitizing use of sizeof and buffer sizes --- .../pfMetalPipeline/plMetalDevice.cpp | 105 ++++++++++++++---- .../pfMetalPipeline/plMetalFragmentShader.cpp | 2 +- .../pfMetalPipeline/plMetalPipeline.cpp | 23 ++-- .../pfMetalPipeline/plMetalPipelineState.cpp | 8 +- .../pfMetalPipeline/plMetalPlateManager.cpp | 6 +- .../pfMetalPipeline/plMetalVertexShader.cpp | 2 +- 6 files changed, 104 insertions(+), 42 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index 94a4741f3a..ce452f2eef 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -60,12 +60,77 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "pfMetalPipeline/plMetalPipelineState.h" #include "pfMetalPipeline/ShaderSrc/ShaderTypes.h" +/// Macros for getting/setting data in a vertex buffer +template +static inline void inlCopy(uint8_t*& src, uint8_t*& dst) +{ + T* src_ptr = reinterpret_cast(src); + T* dst_ptr = reinterpret_cast(dst); + *dst_ptr = *src_ptr; + src += sizeof(T); + dst += sizeof(T); +} + +static inline void inlCopy(const uint8_t*& src, uint8_t*& dst, size_t sz) +{ + memcpy(dst, src, sz); + src += sz; + dst += sz; +} + +template +static inline const uint8_t* inlExtract(const uint8_t* src, T* val) +{ + const T* ptr = reinterpret_cast(src); + *val = *ptr++; + return reinterpret_cast(ptr); +} + +template<> +inline const uint8_t* inlExtract(const uint8_t* src, hsPoint3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 1.f; + return reinterpret_cast(src_ptr); +} + +template<> +inline const uint8_t* inlExtract(const uint8_t* src, hsVector3* val) +{ + const float* src_ptr = reinterpret_cast(src); + float* dst_ptr = reinterpret_cast(val); + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr++ = *src_ptr++; + *dst_ptr = 0.f; + return reinterpret_cast(src_ptr); +} + +template +static inline void inlSkip(uint8_t*& src) +{ + src += sizeof(T) * N; +} + +template +static inline uint8_t* inlStuff(uint8_t* dst, const T* val) +{ + T* ptr = reinterpret_cast(dst); + *ptr++ = *val; + return reinterpret_cast(ptr); +} + matrix_float4x4* hsMatrix2SIMD(const hsMatrix44& src, matrix_float4x4* dst) { + constexpr auto matrixSize = sizeof(matrix_float4x4); if (src.fFlags & hsMatrix44::kIsIdent) { - memcpy(dst, &matrix_identity_float4x4, sizeof(float) * 16); + memcpy(dst, &matrix_identity_float4x4, matrixSize); } else { - memcpy(dst, &src.fMap, sizeof(matrix_float4x4)); + memcpy(dst, &src.fMap, matrixSize); } return dst; @@ -170,11 +235,11 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh // we're actually drawing to screen. if (fCurrentRenderTargetCommandEncoder) { - half4 halfClearColor; - halfClearColor[0] = clearColor.r; - halfClearColor[1] = clearColor.g; - halfClearColor[2] = clearColor.b; - halfClearColor[3] = clearColor.a; + half4 clearColor; + clearColor[0] = clearColor.r; + clearColor[1] = clearColor.g; + clearColor[2] = clearColor.b; + clearColor[3] = clearColor.a; plMetalDevice::plMetalLinkedPipeline* linkedPipeline = plMetalClearPipelineState(this, shouldClearColor, shouldClearDepth).GetRenderPipelineState(); const MTL::RenderPipelineState* pipelineState = linkedPipeline->pipelineState; @@ -190,7 +255,7 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh CurrentRenderCommandEncoder()->setCullMode(MTL::CullModeNone); CurrentRenderCommandEncoder()->setVertexBytes(&clearCoords, sizeof(clearCoords), 0); - CurrentRenderCommandEncoder()->setFragmentBytes(&halfClearColor, sizeof(halfClearColor), 0); + CurrentRenderCommandEncoder()->setFragmentBytes(&clearColor, sizeof(clearColor), 0); CurrentRenderCommandEncoder()->setFragmentBytes(&clearDepth, sizeof(float), 1); CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } else { @@ -434,7 +499,7 @@ bool plMetalDevice::BeginRender() static uint32_t IGetBufferFormatSize(uint8_t format) { - uint32_t size = sizeof(float) * 6 + sizeof(uint32_t) * 2; // Position and normal, and two packed colors + uint32_t size = sizeof(hsPoint3) * 2 + sizeof(hsColor32) * 2; // Position and normal, and two packed colors switch (format & plGBufferGroup::kSkinWeightMask) { case plGBufferGroup::kSkinNoWeights: @@ -446,7 +511,7 @@ static uint32_t IGetBufferFormatSize(uint8_t format) hsAssert(false, "Invalid skin weight value in IGetBufferFormatSize()"); } - size += sizeof(float) * 3 * plGBufferGroup::CalcNumUVs(format); + size += sizeof(hsPoint3) * plGBufferGroup::CalcNumUVs(format); return size; } @@ -579,26 +644,20 @@ void plMetalDevice::FillVolatileVertexBufferRef(plMetalDevice::VertexBufferRef* uint8_t* dst = ref->fData; uint8_t* src = group->GetVertBufferData(idx); - size_t uvChanSize = plGBufferGroup::CalcNumUVs(group->GetVertexFormat()) * sizeof(float) * 3; + size_t uvChanSize = plGBufferGroup::CalcNumUVs(group->GetVertexFormat()) * sizeof(hsPoint3); uint8_t numWeights = (group->GetVertexFormat() & plGBufferGroup::kSkinWeightMask) >> 4; for (uint32_t i = 0; i < ref->fCount; ++i) { - memcpy(dst, src, sizeof(hsPoint3)); // pre-pos - dst += sizeof(hsPoint3); - src += sizeof(hsPoint3); + inlCopy(src, dst); // pre-pos src += numWeights * sizeof(float); // weights if (group->GetVertexFormat() & plGBufferGroup::kSkinIndices) - src += sizeof(uint32_t); // indices - - memcpy(dst, src, sizeof(hsVector3)); // pre-normal - dst += sizeof(hsVector3); - src += sizeof(hsVector3); - - memcpy(dst, src, sizeof(uint32_t) * 2); // diffuse & specular - dst += sizeof(uint32_t) * 2; - src += sizeof(uint32_t) * 2; + inlSkip(src); // indices + + inlCopy(src, dst); // pre-normal + inlCopy(src, dst); // diffuse + inlCopy(src, dst); // specular // UVWs memcpy(dst, src, uvChanSize); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp index 8b0c886033..8daad2b76e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.cpp @@ -68,7 +68,7 @@ bool plMetalFragmentShader::ISetConstants(plMetalPipeline* pipe) { if (fOwner->GetNumConsts()) { float* ptr = (float*)fOwner->GetConstBasePtr(); - pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setFragmentBytes(ptr, fOwner->GetNumConsts() * sizeof(simd_float4), VertexShaderArgumentMaterialShaderUniforms); } return true; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index e1d9664ff7..2c4ca4f66c 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -864,17 +864,19 @@ bool plMetalPipeline::SetGamma(const uint16_t* const tabR, const uint16_t* const per channel data. The Metal renderer supports up to 10 bit colors - but it can subsample the texture to interpolate the colors in between what the LUT defines. */ + constexpr size_t numLuts = 256; + MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); texDescriptor->setTextureType(MTL::TextureType1DArray); - texDescriptor->setWidth(256); + texDescriptor->setWidth(numLuts); texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); texDescriptor->setArrayLength(3); fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); - fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 0, tabR, 256 * sizeof(uint16_t), 0); - fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 1, tabG, 256 * sizeof(uint16_t), 0); - fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 256), 0, 2, tabB, 256 * sizeof(uint16_t), 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 0, tabR, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 1, tabG, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 2, tabB, 0, 0); return true; } @@ -893,18 +895,19 @@ bool plMetalPipeline::SetGamma10(const uint16_t* const tabR, const uint16_t* con by normalized co-ordinate - not value. So the width of the texture can vary. */ + constexpr size_t numLuts = 1024; MTL::TextureDescriptor* texDescriptor = MTL::TextureDescriptor::alloc()->init()->autorelease(); texDescriptor->setTextureType(MTL::TextureType1DArray); - texDescriptor->setWidth(1024); + texDescriptor->setWidth(numLuts); texDescriptor->setPixelFormat(MTL::PixelFormatR16Uint); texDescriptor->setArrayLength(3); fDevice.fGammaLUTTexture = fDevice.fMetalDevice->newTexture(texDescriptor); - fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 0, tabR, 1024 * sizeof(uint16_t), 0); - fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 1, tabG, 1024 * sizeof(uint16_t), 0); - fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, 1024), 0, 2, tabB, 1024 * sizeof(uint16_t), 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 0, tabR, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 1, tabG, 0, 0); + fDevice.fGammaLUTTexture->replaceRegion(MTL::Region(0, numLuts), 0, 2, tabB, 0, 0); return true; } @@ -2751,9 +2754,9 @@ void plMetalPipeline::IPreprocessAvatarTextures() vertexDescriptor->attributes()->object(0)->setOffset(0); vertexDescriptor->attributes()->object(1)->setFormat(MTL::VertexFormatFloat2); vertexDescriptor->attributes()->object(1)->setBufferIndex(0); - vertexDescriptor->attributes()->object(1)->setOffset(sizeof(float) * 2); + vertexDescriptor->attributes()->object(1)->setOffset(sizeof(simd_float2)); - vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 4); + vertexDescriptor->layouts()->object(0)->setStride(sizeof(simd_float2) * 2); descriptor->setVertexDescriptor(vertexDescriptor); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 4f511dc026..9cd719ad22 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -123,14 +123,14 @@ size_t plMetalMaterialPassPipelineState::GetHash() const void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor) { int vertOffset = 0; - int skinWeightOffset = vertOffset + (sizeof(float) * 3); + int skinWeightOffset = vertOffset + sizeof(hsPoint3); if (fHasSkinIndices) { skinWeightOffset += sizeof(uint32_t); } int normOffset = skinWeightOffset + (sizeof(float) * this->fNumWeights); - int colorOffset = normOffset + (sizeof(float) * 3); + int colorOffset = normOffset + sizeof(hsPoint3); int baseUvOffset = colorOffset + (sizeof(uint32_t) * 2); - int stride = baseUvOffset + (sizeof(float) * 3 * this->fNumUVs); + int stride = baseUvOffset + sizeof(hsPoint3) * this->fNumUVs; vertexDescriptor->attributes()->object(VertexAttributePosition)->setFormat(MTL::VertexFormatFloat3); vertexDescriptor->attributes()->object(VertexAttributePosition)->setBufferIndex(0); @@ -151,7 +151,7 @@ void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescri for (int i = 0; i < this->fNumUVs; i++) { vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setFormat(MTL::VertexFormatFloat3); vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setBufferIndex(0); - vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setOffset(baseUvOffset + (i * sizeof(float) * 3)); + vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setOffset(baseUvOffset + (i * sizeof(hsPoint3))); } vertexDescriptor->attributes()->object(VertexAttributeColor)->setFormat(MTL::VertexFormatUChar4); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 4e927dc8b5..82db658b66 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -79,7 +79,7 @@ void plMetalPlateManager::ICreateGeometry() fVtxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&vertexBuffer, sizeof(plateVertexBuffer), MTL::StorageModeManaged); fVtxBuffer->retain(); - idxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&indices, sizeof(uint16_t) * 6, MTL::StorageModeManaged); + idxBuffer = pipeline->fDevice.fMetalDevice->newBuffer(&indices, sizeof(indices), MTL::StorageModeManaged); } } @@ -157,8 +157,8 @@ void plMetalPlatePipelineState::ConfigureVertexDescriptor(MTL::VertexDescriptor* vertexDescriptor->attributes()->object(1)->setBufferIndex(VertexAttributeTexcoord); vertexDescriptor->attributes()->object(1)->setOffset(0); - vertexDescriptor->layouts()->object(0)->setStride(sizeof(float) * 2); - vertexDescriptor->layouts()->object(1)->setStride(sizeof(float) * 2); + vertexDescriptor->layouts()->object(0)->setStride(sizeof(simd_float2)); + vertexDescriptor->layouts()->object(1)->setStride(sizeof(simd_float2)); } void plMetalPlatePipelineState::GetFunctionConstants(MTL::FunctionConstantValues*) const diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp index 7aeedf0f3b..f2303bf89b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.cpp @@ -68,7 +68,7 @@ bool plMetalVertexShader::ISetConstants(plMetalPipeline* pipe) { if (fOwner->GetNumConsts()) { float* ptr = (float*)fOwner->GetConstBasePtr(); - pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(float) * 4, VertexShaderArgumentMaterialShaderUniforms); + pipe->GetMetalDevice()->CurrentRenderCommandEncoder()->setVertexBytes(ptr, fOwner->GetNumConsts() * sizeof(simd_float4), VertexShaderArgumentMaterialShaderUniforms); } return true; From 168277fb67e5ddbe4a6620c174939c065db105fb Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 5 Nov 2023 22:34:22 -0800 Subject: [PATCH 152/165] Removing manual include of headers that are already public --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 273ac98269..704ca61fcb 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -184,8 +184,6 @@ if(APPLE) set_source_files_properties(Mac-Cocoa/Assets.xcassets ${RESOURCES} PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) - target_include_directories(plClient PRIVATE "../../FeatureLib/pfMetalPipeline/metal-cpp" - PRIVATE "../../FeatureLib/pfMetalPipeline/ShaderSrc") target_link_libraries(plClient PRIVATE "-framework MetalPerformanceShaders") install( TARGETS plClient From f65a5eb3d3202bce05b112b067c946db9d975ad0 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 5 Nov 2023 22:44:00 -0800 Subject: [PATCH 153/165] Moving MPS link to public definition in Metal pipeline --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 1 - Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index 704ca61fcb..b9edde0b2c 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -184,7 +184,6 @@ if(APPLE) set_source_files_properties(Mac-Cocoa/Assets.xcassets ${RESOURCES} PROPERTIES MACOSX_PACKAGE_LOCATION Resources ) - target_link_libraries(plClient PRIVATE "-framework MetalPerformanceShaders") install( TARGETS plClient DESTINATION client diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index 8f3a9cb084..9d286f3218 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -46,11 +46,13 @@ target_link_libraries(pfMetalPipeline pnNucleusInc plPipeline "-framework Metal" + "-framework MetalPerformanceShaders" PRIVATE plStatusLog INTERFACE pnFactory ) + target_include_directories(pfMetalPipeline PUBLIC ${metalcpp_SOURCE_DIR}) target_include_directories(pfMetalPipeline PUBLIC "ShaderSrc") From d662e4cda3814f1dd21af411c00b175166f05fa1 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 8 Nov 2023 17:45:30 -0800 Subject: [PATCH 154/165] Implementing more review feedback --- .../Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm | 3 +-- .../FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 10 ---------- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm index f1e34e841a..3e21430638 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/PLSServerStatus.mm @@ -65,8 +65,7 @@ - (void)loadServerStatus NSString* urlString = [NSString stringWithSTString:GetServerStatusUrl()]; NSURL* url = [NSURL URLWithString:urlString]; - if (!url || url.host == nil) - { + if (!url || !url.host) { self.serverStatusString = @""; return; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 2c4ca4f66c..a89fa96a4b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1176,16 +1176,6 @@ void plMetalPipeline::IRenderBufferSpan(const plIcicle& span, hsGDeviceRef* vb, plProfile_EndTiming(RenderBuff); -#if 1 - // Enable this for LayerAnimations, but the timing/speed seems wrong - for (size_t i = 0; i < material->GetNumLayers(); i++) { - plLayerInterface* lay = material->GetLayer(i); - if (lay) { - lay->Eval(fTime, fFrame, 0); - } - } -#endif - // Turn on this spans lights and turn off the rest. ISelectLights(&span, mRef); From b4899d7457d0b7bcefc987cc24f186f7c9e94f0c Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Thu, 9 Nov 2023 21:42:56 -0800 Subject: [PATCH 155/165] Shader CMake cleanup --- Sources/Plasma/Apps/plClient/CMakeLists.txt | 21 +----------------- .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 20 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/CMakeLists.txt b/Sources/Plasma/Apps/plClient/CMakeLists.txt index b9edde0b2c..4da137eac5 100644 --- a/Sources/Plasma/Apps/plClient/CMakeLists.txt +++ b/Sources/Plasma/Apps/plClient/CMakeLists.txt @@ -115,27 +115,7 @@ elseif(APPLE) Mac-Cocoa/MainMenu.xib Mac-Cocoa/PLSLoginWindowController.xib Mac-Cocoa/PLSPatcherWindowController.xib - ) - #shaders need to be compiled as part of the app - #this could change in the future, but for now the Metal code expects the library to be compiled in the app - set(plClient_SHADERS - ../../FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/Clear.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal - ../../FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal ) - set_source_files_properties(${plClient_SHADERS} PROPERTIES LANGUAGE METAL) - source_group("Metal Shaders" FILES ${plClient_SHADERS}) - set(plClient_SOURCES ${plClient_SOURCES} ${plClient_SHADERS}) else() list(APPEND plClient_SOURCES main.cpp @@ -258,6 +238,7 @@ target_link_libraries( $<$:pfDXPipeline> $<$:pfGLPipeline> $<$:pfMetalPipeline> + $<$:pfMetalPipelineShaders> CURL::libcurl "$<$:-framework Cocoa>" "$<$:-framework QuartzCore>" diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index 9d286f3218..194097c557 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -58,3 +58,25 @@ target_include_directories(pfMetalPipeline PUBLIC "ShaderSrc") source_group("Source Files" FILES ${pfMetalPipeline_SOURCES}) source_group("Header Files" FILES ${pfMetalPipeline_HEADERS}) + +add_library( pfMetalPipelineShaders INTERFACE ) +set(pfMetalPipeline_SHADERS + ShaderSrc/FixedPipelineShaders.metal + ShaderSrc/PlateShaders.metal + ShaderSrc/BiasNormals.metal + ShaderSrc/CompCosines.metal + ShaderSrc/WaveSet7.metal + ShaderSrc/Grass.metal + ShaderSrc/WaveDecEnv.metal + ShaderSrc/Avatar.metal + ShaderSrc/WaveDec1Lay_7.metal + ShaderSrc/WaveRip.metal + ShaderSrc/Clear.metal + ShaderSrc/GammaCorrection.metal + ShaderSrc/TextFontShader.metal +) +set_source_files_properties(${pfMetalPipeline_SHADERS} TARGET_DIRECTORY plClient PROPERTIES LANGUAGE METAL) +# source group does not work with an interface library in Xcode, but maybe someday... +source_group("Metal Shaders" FILES ${pfMetalPipeline_SHADERS}) + +target_sources(pfMetalPipelineShaders INTERFACE ${pfMetalPipeline_SHADERS}) From da43aff18ec253f3d6b49f49e0af977dbedb3427 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 10 Nov 2023 23:17:20 -0800 Subject: [PATCH 156/165] Code feedback --- .../pfMetalPipeline/ShaderSrc/Avatar.metal | 2 +- .../ShaderSrc/BiasNormals.metal | 10 +- .../ShaderSrc/CompCosines.metal | 16 ++-- .../ShaderSrc/FixedPipelineShaders.metal | 91 +++++++++---------- .../pfMetalPipeline/ShaderSrc/Grass.metal | 22 ++--- .../pfMetalPipeline/ShaderSrc/ShaderTypes.h | 36 +++++--- .../ShaderSrc/WaveDec1Lay_7.metal | 34 +++---- .../ShaderSrc/WaveDecEnv.metal | 76 ++++++++-------- .../pfMetalPipeline/ShaderSrc/WaveRip.metal | 42 ++++----- .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 76 ++++++++-------- .../pfMetalPipeline/plMetalDevice.h | 4 +- .../pfMetalPipeline/plMetalFragmentShader.h | 2 +- .../plMetalMaterialShaderRef.cpp | 39 ++++---- .../plMetalMaterialShaderRef.h | 24 ++--- .../pfMetalPipeline/plMetalPipeline.cpp | 30 +++--- .../pfMetalPipeline/plMetalPipelineState.cpp | 9 +- .../pfMetalPipeline/plMetalPipelineState.h | 6 +- .../pfMetalPipeline/plMetalShader.h | 2 +- .../pfMetalPipeline/plMetalTextFont.h | 4 +- .../pfMetalPipeline/plMetalVertexShader.h | 4 +- 20 files changed, 268 insertions(+), 261 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal index 49e7c62b48..f6626e2e03 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal @@ -70,6 +70,6 @@ fragment half4 PreprocessAvatarFragmentShader(PreprocessAvatarTexturesInOut in [ address::clamp_to_zero); half4 colorSample = layer.sample(colorSampler, in.uvPosition.xy) * half4(blendColor); - + return colorSample; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal index b31778c128..c9421562a3 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal @@ -69,23 +69,23 @@ typedef struct { vertex vs_BiasNormalsOut vs_BiasNormals(Vertex in [[stage_in]], constant vs_BiasNormalsUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_BiasNormalsOut out; - + out.position = float4(in.position, 1.0); - + out.texCoord0 = float4( dot(float4(in.texCoord1, 1.0), uniforms.TexU0), dot(float4(in.texCoord1, 1.0), uniforms.TexV0), 0, 1 ); - + out.texCoord1 = float4( dot(float4(in.texCoord1, 1.0), uniforms.TexU1), dot(float4(in.texCoord1, 1.0), uniforms.TexV1), 0, 1 ); - + out.color1 = uniforms.ScaleBias.xxzz; out.color2 = uniforms.ScaleBias.yyzz; @@ -109,7 +109,7 @@ fragment float4 ps_BiasNormals(vs_BiasNormalsOut in [[stage_in]], // // So c[0].z = 1, but all other c[i].z = 0 // Note also the c4 used for biasing back at the end. - + constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal index a61df286ad..aedd304976 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal @@ -45,14 +45,14 @@ using namespace metal; #include "ShaderVertex.h" -typedef struct { +typedef struct { float4 c0; float4 c1; float4 c2; float4 c3; float4 c4; } vs_CompCosinesUniforms; - + typedef struct { float4 position [[position]]; float4 texCoord0; @@ -64,9 +64,9 @@ typedef struct { vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[stage_in]], constant vs_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_CompCosinesnInOut out; - + out.position = float4(in.position, 1.0); - + float4 texCoord = float4(0, 0, 0, 1); texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c0); out.texCoord0 = texCoord; @@ -76,10 +76,10 @@ vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[stage_in]], out.texCoord2 = texCoord; texCoord.x = dot(float4(in.texCoord1, 1.0), uniforms.c3); out.texCoord3 = texCoord; - + return out; } - + typedef struct { float4 c0; float4 c1; @@ -109,12 +109,12 @@ fragment float4 ps_CompCosines(vs_CompCosinesnInOut in [[stage_in]], // // So c[0].z = 1, but all other c[i].z = 0 // Note also the c4 used for biasing back at the end. - + constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, address::repeat); - + float4 out = 2 * (t0.sample(colorSampler, fract(in.texCoord0.xy)) - 0.5) * uniforms.c0; out += 2 * (t1.sample(colorSampler, fract(in.texCoord1.xy)) - 0.5) * uniforms.c1; out += 2 * (t2.sample(colorSampler, fract(in.texCoord2.xy)) - 0.5) * uniforms.c2; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 267c2f5b2b..3707a237ff 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -57,7 +57,7 @@ using namespace metal; #define GMAT_STATE_ENUM_END(name) }; #include "hsGMatStateEnums.h" - + enum plUVWSrcModifiers: uint32_t{ kUVWPassThru = 0x00000000, kUVWIdxMask = 0x0000ffff, @@ -67,7 +67,7 @@ enum plUVWSrcModifiers: uint32_t{ }; using namespace metal; - + constant const uint8_t sourceType1 [[ function_constant(FunctionConstantSources + 0) ]]; constant const uint8_t sourceType2 [[ function_constant(FunctionConstantSources + 1) ]]; constant const uint8_t sourceType3 [[ function_constant(FunctionConstantSources + 2) ]]; @@ -163,8 +163,8 @@ typedef struct half4 vtxColor [[ centroid_perspective ]]; half4 fogColor; } ColorInOut; - - + + typedef struct { float4 position [[position, invariant]]; @@ -179,7 +179,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], ColorInOut out; // we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. const half4 inColor = half4(in.color.b, in.color.g, in.color.r, in.color.a) / half4(255.0f); - + const half3 MAmbient = mix(inColor.rgb, uniforms.ambientCol, uniforms.ambientSrc); const half4 MDiffuse = mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc); const half3 MEmissive = mix(inColor.rgb, uniforms.emissiveCol, uniforms.emissiveSrc); @@ -188,7 +188,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], half3 LDiffuse = half3(0.0, 0.0, 0.0); const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz; - + float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix); if (temp_hasOnlyWeight1) { const float4 position2 = blendMatrix1 * float4(in.position, 1.0); @@ -199,7 +199,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], constant const plMetalShaderLightSource *lightSource = &lights.lampSources[i]; if (lightSource->scale == 0.0h) continue; - + // direction.w is attenuation float4 direction; @@ -243,13 +243,13 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], // Fog out.fogColor = uniforms.calcFog(vCamPosition); - + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; - + for (size_t layer=0; layer texture [[ texture(16) ]], constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(FragmentShaderArgumentShadowCastUniforms) ]], @@ -667,35 +662,35 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], const sampler colorSample = sampler( mag_filter::linear, min_filter::linear, address::clamp_to_edge); - + half4 currentColor = texture.sample(colorSample, sampleCoords.xy); currentColor.rgb *= in.vtxColor.rgb; - + const float2 LUTCoords = in.texCoord2.xy; const half4 LUTColor = clamp(half4(LUTCoords.x), 0.0h, 1.0h);; - + currentColor.rgb = (1.0h - LUTColor.rgb) * currentColor.rgb; currentColor.a = LUTColor.a - currentColor.a; - + // only possible alpha sources are layers 0 or 1 if (alphaSrc == 0 && passCount > 0) { - + half4 layerColor = layers.sampleLayer(0, in.vtxColor,sourceTypes[0], in.texCoord3); - + currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; } else if (alphaSrc == 1 && passCount > 1) { - + half4 layerColor = layers.sampleLayer(1, in.vtxColor, sourceTypes[1], in.texCoord3); - + currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; } - + //a lpha blend goes here - + if (currentColor.a <= 0.0h) discard_fragment(); - + return currentColor; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal index c3beb37f24..f5e5470b13 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal @@ -35,39 +35,39 @@ typedef struct { vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], constant vs_GrassUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_GrassInOut out; - + float4 r0 = (in.position.x * uniforms.waveDirX) + (in.position.y * uniforms.waveDirX); - + r0 += (uniforms.time.x * uniforms.waveSpeed); // scale by speed and add to X,Y input r0 = fract(r0); - + r0 = (r0 - 0.5) * M_PI_F * 2; - + float4 pow2 = r0 * r0; float4 pow3 = pow2 * r0; float4 pow5 = pow2 * pow3; float4 pow7 = pow2 * pow5; float4 pow9 = pow2 * pow7; - + r0 += pow3 * uniforms.sinConstants.x; r0 += pow5 * uniforms.sinConstants.y; r0 += pow7 * uniforms.sinConstants.z; r0 += pow9 * uniforms.sinConstants.w; - + float3 offset = float3( dot(r0, uniforms.waveDistortX), dot(r0, uniforms.waveDistortY), dot(r0, uniforms.waveDistortZ) ); - + offset *= (2.0 * (1.0 - in.texCoord1.y)); // mult by Y tex coord. So the waves only affect the top verts - + float4 position = float4(in.position.xyz + offset, 1); out.position = position * uniforms.Local2NDC; - + out.color = float4(in.color.r, in.color.g, in.color.b, in.color.a) / 255.0; out.texCoord = float4(in.texCoord1, 0.0); - + return out; } @@ -77,7 +77,7 @@ fragment half4 ps_GrassShader(vs_GrassInOut in [[stage_in]], mag_filter::linear, min_filter::linear, address::repeat); - + half4 out = t0.sample(colorSampler, in.texCoord.xy); out *= half4(in.color); if (out.a <= 0.1) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h index 241b586240..2845921eb8 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/ShaderTypes.h @@ -119,27 +119,31 @@ enum plMetalLayerPassType: uint8_t PassTypeColor = 3 }; -struct plMetalFragmentShaderArgumentBuffer { +struct plMetalFragmentShaderArgumentBuffer +{ __fp16 alphaThreshold; }; #ifndef __METAL_VERSION__ static_assert(std::is_trivial_v, "plMetalFragmentShaderArgumentBuffer must be a trivial type!"); #endif -struct plMetalShadowCastFragmentShaderArgumentBuffer { +struct plMetalShadowCastFragmentShaderArgumentBuffer +{ bool pointLightCast; }; #ifndef __METAL_VERSION__ static_assert(std::is_trivial_v, "plMetalShadowCastFragmentShaderArgumentBuffer must be a trivial type!"); #endif -enum plMetalFragmentShaderTextures { +enum plMetalFragmentShaderTextures +{ FragmentShaderArgumentAttributeTextures = 0, FragmentShaderArgumentAttributeCubicTextures = 8, FragmentShaderArgumentAttributeUniforms = 32 }; -struct plMetalShaderLightSource { +struct plMetalShaderLightSource +{ simd::float4 position; half4 ambient; half4 diffuse; @@ -155,23 +159,23 @@ struct plMetalShaderLightSource { static_assert(std::is_trivial_v, "plMetalShaderLightSource must be a trivial type!"); #endif -typedef struct +struct UVOutDescriptor { uint32_t UVWSrc; matrix_float4x4 transform; -} UVOutDescriptor; +}; #ifndef __METAL_VERSION__ static_assert(std::is_trivial_v, "UVOutDescriptor must be a trivial type!"); #endif -typedef struct +struct VertexUniforms { // transformation matrix_float4x4 projectionMatrix; matrix_float4x4 localToWorldMatrix; matrix_float4x4 cameraToWorldMatrix; matrix_float4x4 worldToCameraMatrix; - + // lighting half4 globalAmb; half3 ambientCol; @@ -183,38 +187,40 @@ typedef struct half3 specularCol; uint8_t specularSrc; bool invVtxAlpha; - + uint8_t fogExponential; simd::float2 fogValues; half3 fogColor; - + UVOutDescriptor uvTransforms[8]; #ifdef __METAL_VERSION__ float3 sampleLocation(size_t index, thread float3 *texCoords, const float4 normal, const float4 camPosition) constant; half4 calcFog(float4 camPosition) constant; #endif -} VertexUniforms; +}; #ifndef __METAL_VERSION__ static_assert(std::is_trivial_v, "VertexUniforms must be a trivial type!"); #endif #define kMetalMaxLightCount 32 -typedef struct { +struct plMetalLights +{ uint8_t count; plMetalShaderLightSource lampSources[kMetalMaxLightCount]; -} plMetalLights; +}; #ifndef __METAL_VERSION__ static_assert(std::is_trivial_v, "plMetalLights must be a trivial type!"); #endif -typedef struct { +struct plShadowState +{ simd::float3 lightPosition; simd::float3 lightDirection; bool directional; float power; half opacity; -} plShadowState; +}; #ifndef __METAL_VERSION__ static_assert(std::is_trivial_v, "plShadowState must be a trivial type!"); #endif diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal index 58f369cd23..c190044888 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal @@ -101,7 +101,7 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); // Fill out our w (m4x3 doesn't touch w). worldPosition.w = 1.0; - + // // Input diffuse v5 color is: @@ -152,11 +152,11 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], // Dot our position with our direction vectors. float4 distance = uniforms.DirectionX * worldPosition.xxxx; distance += uniforms.DirectionY * worldPosition.yyyy; - + // // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); distance = (distance * uniforms.Frequency) + uniforms.Phase; - + // // Now we need dist mod'd into range [-Pi..Pi] // dist *= rcp(kTwoPi); distance += uniforms.PiConsts.zzzz; @@ -167,26 +167,26 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], distance *= (2.0f * M_PI_F); // dist += -kPi; distance += -M_PI_F; - + // // sincos(dist, sinDist, cosDist); // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z - + float4 pow2 = distance * distance; // r0^2 float4 pow3 = pow2 * distance; // r0^3 - probably stall float4 pow4 = pow2 * pow2; // r0^4 float4 pow5 = pow2 * pow3; // r0^5 float4 pow7 = pow2 * pow5; // r0^7 - + //r1 float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; //r2 float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; - + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; - + // Calc our depth based filtering here into r4 (because we don't use it again // after here, and we need our filtering shortly). float4 depth = uniforms.WaterLevel - worldPosition.zzzz; @@ -194,13 +194,13 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], depth += uniforms.MinAtten; // Clamp .xyz to range [0..1] depth = clamp(depth, 0, 1); - + // Calc our filter (see above). float4 inColor = float4(in.color) / 255.0f; float4 filter = inColor.wwww * uniforms.Lengths; filter = max(filter, uniforms.NumericConsts.xxxx); filter = min(filter, uniforms.NumericConsts.zzzz); - + //mov r2, r1; // r2 == sinDist // r1 == cosDist @@ -237,7 +237,7 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], dot(cosDist, uniforms.QADirX), dot(cosDist, uniforms.QADirY) ); - + // Bias our vert up a bit to compensate for precision errors. // In particular, our filter coefficients are coming in as // interpolated bytes, so there's bound to be a lot of slop @@ -247,32 +247,32 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], // actually moving it, but this is easier and might work just // as well. worldPosition.z += uniforms.Bias.x; - + // // // Transform position to screen // // out.position = worldPosition * uniforms.WorldToNDC; out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; - + // Output color is vertex green // Output alpha is vertex red (vtx alpha is used for wave filtering) // Whole thing modulated by material color/opacity. - + out.c0 = half4(in.color.yyyz)/255.0 * half4(uniforms.MatColor); - + // Usual texture transform out.texCoord0.x = dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row0); out.texCoord0.y = dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1); out.texCoord0.z = 0.0f; out.texCoord0.w = 0.0f; - + return out; } fragment half4 ps_CbaseAbase(vs_WaveDev1Lay_7InOut in [[stage_in]], texture2d texture [[ texture(0) ]]) { - + constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index 78be7563fd..6630e39565 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -62,7 +62,7 @@ typedef struct { vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], constant vs_WaveDecEnv7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_WaveDecEnv7InOut out; - + // Store our input position in world space in r6 float4 worldPosition = float4(0); worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); @@ -70,7 +70,7 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); // Fill out our w (m4x3 doesn't touch w). worldPosition.w = 1.0; - + // // Input diffuse v5 color is: @@ -121,42 +121,42 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // Dot our position with our direction vectors. float4 distance = uniforms.DirectionX * worldPosition.xxxx; distance += uniforms.DirectionY * worldPosition.yyyy; - + // // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); distance = (distance * uniforms.Frequency) + uniforms.Phase; - + // // Now we need dist mod'd into range [-Pi..Pi] // dist *= rcp(kTwoPi); distance += uniforms.PiConsts.zzzz; distance *= 1.0f / uniforms.PiConsts.wwww; - + // dist = frac(dist); distance = fract(distance); // dist *= kTwoPi; distance *= uniforms.PiConsts.wwww; // dist += -kPi; distance -= uniforms.PiConsts.zzzz; - + // // sincos(dist, sinDist, cosDist); // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z - + float4 pow2 = distance * distance; // r0^2 float4 pow3 = pow2 * distance; // r0^3 - probably stall float4 pow4 = pow2 * pow2; // r0^4 float4 pow5 = pow2 * pow3; // r0^5 float4 pow7 = pow2 * pow5; // r0^7 - + //r1 float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; //r2 float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; - + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; - + // Calc our depth based filtering here into r4 (because we don't use it again // after here, and we need our filtering shortly). float4 depth = uniforms.WaterLevel - worldPosition.zzzz; @@ -164,13 +164,13 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], depth += uniforms.MinAtten; // Clamp .xyz to range [0..1] depth = clamp(depth, 0, 1); - + // Calc our filter (see above). float4 inColor = float4(in.color) / 255.0f; float4 filter = inColor.wwww * uniforms.Lengths; filter = max(filter, uniforms.NumericConsts.xxxx); filter = min(filter, uniforms.NumericConsts.zzzz); - + //mov r2, r1; // r2 == sinDist // r1 == cosDist @@ -204,7 +204,7 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], dot(cosDist, uniforms.QADirX), dot(cosDist, uniforms.QADirY) ); - + // Bias our vert up a bit to compensate for precision errors. // In particular, our filter coefficients are coming in as // interpolated bytes, so there's bound to be a lot of slop @@ -214,14 +214,14 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // actually moving it, but this is easier and might work just // as well. worldPosition.z += uniforms.Bias.x; - + // // // Transform position to screen // // out.position = worldPosition * uniforms.WorldToNDC; out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; - + // Now onto texture coordinate generation. // // First is the usual texture transform @@ -230,7 +230,7 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], dot(float4(in.texCoord1, 1.0), uniforms.Tex0_Row1), uniforms.NumericConsts.zz ); - + // Calculate our basis vectors as input into our tex3x3vspec // First we get our basis set off our surface. This is // Okay, here we go: @@ -281,47 +281,47 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // |r9 dot v8, r9 dot v9, r9 dot r5| // // We will need r5 as v8 X v9 - + float4 r7 = float4(in.texCoord2, 1.0); float4 r5 = float4(0); r5.xyz = r7.yzx * in.texCoord3.zxy; r5.xyz = (r7.zxy * -in.texCoord3.yzx) + r5.xyz; - + // Okay, r1 currently has the vector of cosines, and r2 has vector of sines. // Everything will want that times amplitude, so go ahead and fold that in. cosDist *= uniforms.Amplitude; - + r7.x = dot(sinDist, -uniforms.DirXSqKW); r7.y = dot(sinDist, -uniforms.DirXDirYKW); r7.z = dot(cosDist, -uniforms.DirXW); r7.x += uniforms.NumericConsts.z; - + float4 r8 = float4(0); r8.x = dot(sinDist, -uniforms.DirXDirYKW); r8.y = dot(sinDist, -uniforms.DirYSqKW); r8.z = dot(cosDist, -uniforms.DirYW); r8.y = r8.y + uniforms.NumericConsts.z; - + float4 r9 = out.position; r9.z = dot(cosDist, -uniforms.WK); r9.x = -r7.z; r9.y = -r8.z; r9.z = r9.z + uniforms.NumericConsts.z; - + // Okay, got everything we need, construct r1-3 as surface2world*texture2surface. float4 r1, r2, r3 = float4(0); r1.x = dot(r7.xyz, in.texCoord2); r1.y = dot(r7.xyz, in.texCoord3); r1.z = dot(r7.xyz, r5.xyz); - + r2.x = dot(r8.xyz, in.texCoord2); r2.y = dot(r8.xyz, in.texCoord3); r2.z = dot(r8.xyz, r5.xyz); - + r3.x = dot(r9.xyz, in.texCoord2); r3.y = dot(r9.xyz, in.texCoord3); r3.z = dot(r9.xyz, r5.xyz); - + // Following section is debug only to skip the per-vert tangent space axes. //add r1, c13.zxxx, r7.zzxw; //add r2, c13.xzxx, r7.zzyw; @@ -333,21 +333,21 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], // See vs_WaveFixedFin6.inl for derivation of the following float4 r0 = worldPosition - uniforms.CameraPos; r0 *= rsqrt(dot(r0.xyz, r0.xyz)); - + float4 r10 = float4(0); r10.x = dot(r0.xyz, uniforms.EnvAdjust.xyz); r10.y = (r10.x * r10.x) - uniforms.EnvAdjust.w; - + r10.z = (r10.y * rsqrt(r10.y)) + r10.x; r0.xyz = (r0.xyz * r10.zzz) - uniforms.EnvAdjust.xyz; - + // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. r0.xyz = normalize(r0.xyz); - + r1.w = -r0.x; r2.w = -r0.y; r3.w = -r0.z; - + // Now r1-r3 are texture2world, with the eye-ray vector in .w. We just // need to normalize them and bung them into output UV's 1-3. // Note we're accounting for our environment map being flipped from @@ -355,16 +355,16 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], r10.w = uniforms.NumericConsts.z; r10.x = rsqrt(dot(r1.xyz, r1.xyz)); out.texCoord1 = r1 * r10.xxxw; - + r10.x = rsqrt(dot(r3.xyz, r3.xyz)); out.texCoord2 = r3 * r10.xxxw; - + r10.x = rsqrt(dot(r2.xyz, r2.xyz)); out.texCoord3 = r2 * r10.xxxw; - + float4 matColor = uniforms.MatColor; out.c1 = clamp(float4(in.color).yyyz/255.0 * matColor, 0.0, 1.0); - + return out; } @@ -373,7 +373,7 @@ fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1) ]]) { // Very simular to ps_WaveFixed.inl. Only the final coloring is different. // Even though so far they are identical. - + constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, @@ -382,13 +382,13 @@ fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], float u = dot(in.texCoord1.xyz, t0.xyz); float v = dot(in.texCoord2.xyz, t0.xyz); float w = dot(in.texCoord3.xyz, t0.xyz); - + float3 N = float3(u, v, w); float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); - + //float3 coord = reflect(E, N); float3 coord = 2*(dot(N, E) / dot(N, N))*N - E; - + // t3 now has our reflected environment map value // We've (presumably) attenuated the effect on a vertex basis // and have our color w/ attenuated alpha in v0. So all we need diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal index 73ee6a44b2..83885af0a7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal @@ -91,7 +91,7 @@ typedef struct { vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], constant vs_WaveRip7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { waveRipInOut out; - + // Store our input position in world space in r6 float4 worldPosition = float4(0); worldPosition.x = dot(float4(in.position, 1.0), uniforms.L2WRow0); @@ -99,7 +99,7 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], worldPosition.z = dot(float4(in.position, 1.0), uniforms.L2WRow2); // Fill out our w (m4x3 doesn't touch w). worldPosition.w = 1.0; - + // // Input diffuse v5 color is: @@ -150,42 +150,42 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], // Dot our position with our direction vectors. float4 distance = uniforms.DirectionX * worldPosition.xxxx; distance += uniforms.DirectionY * worldPosition.yyyy; - + // // dist = mad( dist, kFreq.xyzw, kPhase.xyzw); distance = (distance * uniforms.Frequency) + uniforms.Phase; - + // // Now we need dist mod'd into range [-Pi..Pi] // dist *= rcp(kTwoPi); distance += uniforms.PiConsts.zzzz; distance *= 1.0f / uniforms.PiConsts.wwww; - + // dist = frac(dist); distance = fract(distance); // dist *= kTwoPi; distance *= uniforms.PiConsts.wwww; // dist += -kPi; distance -= uniforms.PiConsts.zzzz; - + // // sincos(dist, sinDist, cosDist); // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z // cos = 1 + r0^2 * vCos.y + r0^4 * vCos.z - + float4 pow2 = distance * distance; // r0^2 float4 pow3 = pow2 * distance; // r0^3 - probably stall float4 pow4 = pow2 * pow2; // r0^4 float4 pow5 = pow2 * pow3; // r0^5 float4 pow7 = pow2 * pow5; // r0^7 - + //r1 float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; //r2 float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; - + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; - + // Calc our depth based filtering here into r4 (because we don't use it again // after here, and we need our filtering shortly). float4 depth = uniforms.WaterLevel - worldPosition.zzzz; @@ -193,12 +193,12 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], depth += uniforms.MinAtten; // Clamp .xyz to range [0..1] depth = clamp(depth, 0, 1); - + // Calc our filter (see above). float4 inColor = float4(in.color) / 255.0f; float4 filter = inColor.wwww * uniforms.Lengths; filter = clamp(filter, 0.0f, 1.0f); - + //mov r2, r1; // r2 == sinDist // r1 == cosDist @@ -232,7 +232,7 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], dot(cosDist, uniforms.QADirX), dot(cosDist, uniforms.QADirY) ); - + // Bias our vert up a bit to compensate for precision errors. // In particular, our filter coefficients are coming in as // interpolated bytes, so there's bound to be a lot of slop @@ -242,14 +242,14 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], // actually moving it, but this is easier and might work just // as well. worldPosition.z += uniforms.RampBias.z; - + // // // Transform position to screen // // out.position = worldPosition * uniforms.WorldToNDC; out.fog = (out.position.w + uniforms.FogSet.x) * uniforms.FogSet.y; - + // Dyna Stuff // Constants // c33 = fC1U, fC2U, fC1V, fC2V @@ -260,7 +260,7 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], // v7.z = fBirth (because we don't use it for anything else). // // Initialize r1.zw to 0,1 - + // Calc r1.x = age, r1.y = atten // age = t - birth. const float age = uniforms.LifeConsts.y - in.texCoord1.z; @@ -268,13 +268,13 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], // first clamp0_1(age/ramp) const float atten = clamp(age * uniforms.RampBias.y, 0.0f, 1.0f) * clamp((uniforms.LifeConsts.z - age) * uniforms.LifeConsts.w, 0.0f, 1.0f); - + // color is (atten, atten, atten, 1.f) // Need to calculate opacity we would have had from vs_WaveFixedFin7.inl // Right now that's just modulating by r4.y. - + out.c1 = (depth.y * uniforms.LifeConsts.x) * half4(atten, atten, atten, 1.0h); - + // UVW = (inUVW - 0.5) * scale + 0.5 // where: // scale = (fC1U / (age * fC2U + 1.f)), fC1V / (age * fC2U + 1.f), 1.f, 1.f @@ -285,7 +285,7 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], out.texCoord0 = in.texCoord1.xy - 0.5f; out.texCoord0 *= scale.xy; out.texCoord0 += 0.5f; - + return out; } @@ -296,6 +296,6 @@ fragment half4 ps_WaveRip(waveRipInOut in [[stage_in]], min_filter::linear, address::clamp_to_edge); half4 t0 = texture.sample(colorSampler, in.texCoord0.xy); - + return t0 * in.c1; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index 04998c3d8e..f56e600f8e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -81,7 +81,7 @@ typedef struct { float4 DirXDirYKW; float4 DirYSqKW; } vs_WaveFixedFin7Uniforms; - + typedef struct { float4 position [[position]]; float4 c1; @@ -96,21 +96,21 @@ typedef struct { vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { vs_WaveFixedFin7InOut out; - + // Store our input position in world space in r6 float3 column1 = float3(uniforms.LocalToWorldRow1[0], uniforms.LocalToWorldRow2[0], uniforms.LocalToWorldRow3[0]); float3 column2 = float3(uniforms.LocalToWorldRow1[1], uniforms.LocalToWorldRow2[1], uniforms.LocalToWorldRow3[1]); float3 column3 = float3(uniforms.LocalToWorldRow1[2], uniforms.LocalToWorldRow2[2], uniforms.LocalToWorldRow3[2]); float3 column4 = float3(uniforms.LocalToWorldRow1[3], uniforms.LocalToWorldRow2[3], uniforms.LocalToWorldRow3[3]); - + matrix_float4x3 localToWorld; localToWorld[0] = column1; localToWorld[1] = column2; localToWorld[2] = column3; localToWorld[3] = column4; - + float4 worldPosition = float4(localToWorld * float4(in.position, 1.0), 1.0); - + // // Input diffuse v5 color is: @@ -179,16 +179,16 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], distance *= uniforms.PiConsts.wwww; // dist += -kPi; distance -= uniforms.PiConsts.zzzz; - + //Metals pow function does not like negative bases //Doing the same thing as the DX assembly until I know more about why - + float4 pow2 = distance * distance; // r0^2 float4 pow3 = pow2 * distance; // r0^3 - probably stall float4 pow4 = pow2 * pow2; // r0^4 float4 pow5 = pow2 * pow3; // r0^5 float4 pow7 = pow2 * pow5; // r0^7 - + // // sincos(dist, sinDist, cosDist); // sin = r0 + r0^3 * vSin.y + r0^5 * vSin.z @@ -197,11 +197,11 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], float4 cosDist = 1 + pow2 * uniforms.CosConsts.y + pow4 * uniforms.CosConsts.z; //r2 float4 sinDist = distance + pow3 * uniforms.SinConsts.y + pow5 * uniforms.SinConsts.z; - + cosDist = ((pow3 * pow3) * uniforms.CosConsts.w) + cosDist; sinDist = (pow7 * uniforms.SinConsts.w) + sinDist; - - + + // Calc our depth based filtering here into r4 (because we don't use it again // after here, and we need our filtering shortly). float4 depth = uniforms.WaterLevel - worldPosition.zzzz; @@ -209,13 +209,13 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], depth += uniforms.MinAtten; // Clamp .xyz to range [0..1] depth = clamp(depth, 0, 1); - + // Calc our filter (see above). float4 inColor = float4(in.color) / 255.0f; float4 filter = inColor.wwww * uniforms.Lengths; filter = max(filter, uniforms.NumericConsts.xxxx); filter = min(filter, uniforms.NumericConsts.zzzz); - + //mov r2, r1; // r2 == sinDist // r1 == cosDist @@ -269,27 +269,27 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], // Nor = (-P, -N, 1 - Q) // // But we want the transpose of that to go into r1-r3 - + worldPosition.x += dot(cosDist, uniforms.DirXK); worldPosition.y += dot(cosDist, uniforms.DirYK); - + float4 r1, r2, r3 = 0; - + r1.x = dot(sinDist, -uniforms.DirXSqKW); r2.x = dot(sinDist, -uniforms.DirXDirYKW); r3.x = dot(cosDist, uniforms.DirXW); r1.x = r1.x + uniforms.NumericConsts.z; - + r1.y = dot(sinDist, -uniforms.DirXDirYKW); r2.y = dot(sinDist, -uniforms.DirYSqKW); r3.y = dot(cosDist, uniforms.DirYW); r2.y = r2.y + uniforms.NumericConsts.z; - + r1.z = dot(cosDist, -uniforms.DirXW); r2.z = dot(cosDist, -uniforms.DirYW); r3.z = dot(sinDist, -uniforms.WK); r3.z = r3.z + uniforms.NumericConsts.z; - + // Calculate our normalized vector from camera to vtx. // We'll use that a couple of times coming up. float4 r5 = worldPosition - uniforms.CameraPos; @@ -297,7 +297,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], r10.x = rsqrt(dot(r5.xyz, r5.xyz)); r5 = r5 * r10.xxxx; r5.w = 1.0 / r10.x; - + // Calculate our specular attenuation from and into r5.w. // r5.w starts off the distance from vtx to camera. // Once we've turned it into an attenuation factor, we @@ -312,7 +312,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], r5.w = max(r5.w, uniforms.NumericConsts.x); r5.w *= r5.w; // Square it to account for perspective r5.w *= uniforms.SpecAtten.z; - + // So, our "finitized" eyeray is: // camPos + D * t - envCenter = D * t - (envCenter - camPos) // with @@ -341,10 +341,10 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], // = r0 * r10.zzzz - F; // //https://developer.download.nvidia.com/books/HTML/gpugems/gpugems_ch01.html - - + + float4 r0 = float4(0); - + { float3 D = r5.xyz; float3 F = uniforms.EnvAdjust.xyz; @@ -353,30 +353,30 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], float3 t = dot(D.xyz, F.xyz) + sqrt(abs(pow(abs(dot(D.xyz, F.xyz)), 2) - G));// r10.z = D dot F + SQRT((D dot F)^2 - G) r0.xyz = (D * t) - F; // r0.xyz = D * t - (envCenter - camPos) } - + // ATI 9000 is having trouble with eyeVec as computed. Normalizing seems to get it over the hump. r0.xyz = normalize(r0.xyz); - + r1.w = -r0.x; r2.w = -r0.y; r3.w = -r0.z; - + r0.zw = uniforms.NumericConsts.xz; - + float4 r11 = float4(0); - + r0.x = dot(r1.xyz, r1.xyz); r0.xy = rsqrt(r0.x); r0.x *= r5.w; out.texCoord1 = r1 * r0.xxyw; r11.x = r1.z * r0.y; - + r0.x = dot(r2.xyz, r2.xyz); r0.xy = rsqrt(r0.x); r0.x *= r5.w; out.texCoord3 = r2 * r0.xxyw; r11.y = r2.z * r0.y; - + r0.x = dot(r3.xyz, r3.xyz); r0.xy = rsqrt(r0.x); r0.x *= r5.w; @@ -412,11 +412,11 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], r10.x = r9.w + uniforms.FogSet.x; out.fog = r10.x * uniforms.FogSet.y; out.position = r9; - + // Transform our uvw out.texCoord0 = float4(in.position.xy * uniforms.UVScale.x, 0, 1); - + // Questionble attenuation follows // vector from this point to camera and normalize stashed in r5 // Dot that with the computed normal @@ -433,7 +433,7 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], r1.w *= uniforms.WaterTint.w; out.c1 = clamp(r1 * uniforms.EnvTint, 0, 1); out.c2 = uniforms.WaterTint; // SEENORM - + return out; } @@ -455,7 +455,7 @@ fragment float4 ps_WaveFixed(vs_WaveFixedFin7InOut in [[stage_in]], // Since environment map has alpha = 255, the output of this // shader can be used for either alpha or additive blending, // as long as v0 is fed in appropriately. - + constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, @@ -464,13 +464,13 @@ fragment float4 ps_WaveFixed(vs_WaveFixedFin7InOut in [[stage_in]], float u = dot(in.texCoord1.xyz, t0); float v = dot(in.texCoord2.xyz, t0); float w = dot(in.texCoord3.xyz, t0); - + float3 N = float3(u, v, w); float3 E = float3(in.texCoord1.w, in.texCoord2.w, in.texCoord3.w); - + //float3 coord = reflect(E, N); float3 coord = 2*(dot(N, E) / dot(N, N))*N - E; - + float4 out = float4(environmentMap.sample(colorSampler, coord)); out = (out * in.c1) + in.c2; out.a = in.c1.a; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index e24da29fc1..1998fcc4dd 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -172,7 +172,7 @@ class plMetalDevice MTL::SamplerState* SampleStateForClampFlags(hsGMatState::hsGMatClampFlags sampleState) const; - NS::UInteger CurrentTargetSampleCount() + NS::UInteger CurrentTargetSampleCount() const { if (fCurrentRenderTarget) { return 1; @@ -185,7 +185,7 @@ class plMetalDevice void EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, float sigma); - MTL::PixelFormat GetFramebufferFormat() { return fFramebufferFormat; }; + MTL::PixelFormat GetFramebufferFormat() const { return fFramebufferFormat; }; private: struct plMetalPipelineRecord diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h index 10a1ccbcae..c0b9c9ac1a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalFragmentShader.h @@ -58,7 +58,7 @@ class plMetalFragmentShader : public plMetalShader void Release() override; void Link(plMetalFragmentShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } - plMetalFragmentShader* const GetNext() { return (plMetalFragmentShader*)fNext; } + plMetalFragmentShader* const GetNext() const { return (plMetalFragmentShader*)fNext; } }; #endif // plMetalFragmentShader_inc diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 4b0498e613..85269bf520 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -45,20 +45,22 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include "HeadSpin.h" #include "hsBitVector.h" #include "hsGMatState.inl" +#include "plPipeline.h" + +#include "plMetalDevice.h" +#include "plMetalPipeline.h" #include "plDrawable/plGBufferGroup.h" #include "plGImage/plCubicEnvironmap.h" #include "plGImage/plMipmap.h" -#include "plMetalDevice.h" -#include "plMetalPipeline.h" #include "plPipeDebugFlags.h" -#include "plPipeline.h" #include "plPipeline/plCubicRenderTarget.h" #include "plPipeline/plRenderTarget.h" #include "plSurface/hsGMaterial.h" #include "plSurface/plLayerInterface.h" #include +#include plMetalMaterialShaderRef::plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe) : fPipeline(pipe), fMaterial(mat), @@ -181,18 +183,20 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encode IHandleMaterial( GetPassIndex(pass), passDescription, &uniforms, piggyBacks, [this, &preEncodeTransform, &encoder, &pass, &vertexUniforms](plLayerInterface* layer, uint32_t index) { - layer = preEncodeTransform(layer, index); - IBuildLayerTexture(encoder, index, layer); + layer = preEncodeTransform(layer, index); + IBuildLayerTexture(encoder, index, layer); - plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); + plBitmap* img = plBitmap::ConvertNoRef(layer->GetTexture()); - assert(index - GetPassIndex(pass) >= 0); - EncodeTransform(layer, &vertexUniforms->uvTransforms[index]); + assert(index - GetPassIndex(pass) >= 0); + EncodeTransform(layer, &vertexUniforms->uvTransforms[index]); - return layer; - }, [&postEncodeTransform](plLayerInterface* layer, uint32_t index) { - return postEncodeTransform(layer, index); - }); + return layer; + }, + [&postEncodeTransform](plLayerInterface* layer, uint32_t index) { + return postEncodeTransform(layer, index); + } + ); encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), FragmentShaderArgumentUniforms); } @@ -256,10 +260,12 @@ void plMetalMaterialShaderRef::ILoopOverLayers() j = IHandleMaterial( currLayer, &passDescription, layerBuffer, nullptr, [](plLayerInterface* layer, uint32_t index) { - return layer; - }, [](plLayerInterface* layer, uint32_t index) { - return layer; - }); + return layer; + }, + [](plLayerInterface* layer, uint32_t index) { + return layer; + } + ); if (j == -1) break; @@ -420,6 +426,7 @@ uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, return -1; } + static_assert(std::is_trivial_v, "plMetalFragmentShaderDescription must be a POD type!"); memset(passDescription, 0, sizeof(plMetalFragmentShaderDescription)); // Ignoring the bit about ATI Radeon and UVW limits diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 0638cfe2c8..0ab3eddd66 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -69,7 +69,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef std::vector fPassArgumentBuffers; public: - void Link(plMetalMaterialShaderRef **back) { plMetalDeviceRef::Link((plMetalDeviceRef **)back); } + void Link(plMetalMaterialShaderRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalMaterialShaderRef* GetNext() const { return (plMetalMaterialShaderRef*)fNext; } plMetalMaterialShaderRef(hsGMaterial* mat, plMetalPipeline* pipe); @@ -83,23 +83,23 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef uint32_t GetPassIndex(size_t which) const { return fPassIndices[which]; } const std::vector GetLayersForPass(size_t pass) const { return fPasses[pass]; } - void EncodeArguments(MTL::RenderCommandEncoder *encoder, - VertexUniforms *vertexUniforms, + void EncodeArguments(MTL::RenderCommandEncoder* encoder, + VertexUniforms* vertexUniforms, uint pass, - plMetalFragmentShaderDescription *passDescription, - std::vector *piggyBacks, - std::function preEncodeTransform, - std::function postEncodeTransform); - void FastEncodeArguments(MTL::RenderCommandEncoder *encoder, VertexUniforms *vertexUniforms, uint pass); + plMetalFragmentShaderDescription* passDescription, + std::vector* piggyBacks, + std::function preEncodeTransform, + std::function postEncodeTransform); + void FastEncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass); // probably not a good idea to call prepareTextures directly // mostly just a hack to keep plates working for now - void prepareTextures(MTL::RenderCommandEncoder *encoder, uint pass); + void prepareTextures(MTL::RenderCommandEncoder* encoder, uint pass); std::vector fPassLengths; // Set the current Plasma state based on the input layer state and the material overrides. // fMatOverOn overrides to set a state bit whether it is set in the layer or not. // fMatOverOff overrides to clear a state bit whether it is set in the layer or not.s - const hsGMatState ICompositeLayerState(const plLayerInterface *layer) const; + const hsGMatState ICompositeLayerState(const plLayerInterface* layer) const; const struct plMetalFragmentShaderDescription GetFragmentShaderDescription(size_t which) const { @@ -110,7 +110,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void ILoopOverLayers(); uint32_t fNumPasses; - uint32_t IHandleMaterial(uint32_t layer, + uint32_t IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription* passDescription, plMetalFragmentShaderArgumentBuffer* uniforms, std::vector* piggybacks, @@ -121,7 +121,7 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef void IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer); void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); - std::vector> fPasses; + std::vector> fPasses; std::vector fFragmentShaderDescriptions; }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index a89fa96a4b..8efe2606a5 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1616,7 +1616,7 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, numActivePiggyBacks = fActivePiggyBacks; } - struct plMetalFragmentShaderDescription fragmentShaderDescription; + plMetalFragmentShaderDescription fragmentShaderDescription; lay = IPopOverAllLayer(lay); lay = IPopOverBaseLayer(lay); @@ -1640,14 +1640,14 @@ bool plMetalPipeline::IHandleMaterialPass(hsGMaterial* material, uint32_t pass, return layer; }; - + auto postEncodeTransform = [this](plLayerInterface* layer, uint32_t index) { layer = IPopOverAllLayer(layer); if (index == 0) layer = IPopOverBaseLayer(layer); return layer; }; - + mRef->EncodeArguments(fDevice.CurrentRenderCommandEncoder(), fCurrentRenderPassUniforms, pass, @@ -2051,14 +2051,14 @@ void plMetalPipeline::ISetLayer(uint32_t lay) fCurrRenderLayer = lay; plCONST(int) kBiasMult = 8; - static float mult [[gnu::used]] = -8.0; - static float constBias [[gnu::used]] = -0.0; - static float max [[gnu::used]] = -0.00001; + static float mult [[gnu::used]] = -8.0f; + static float constBias [[gnu::used]] = -0.0f; + static float max [[gnu::used]] = -0.00001f; fDevice.CurrentRenderCommandEncoder()->setDepthBias(constBias, mult, max); } } else if (fCurrRenderLayer != 0) { fCurrRenderLayer = 0; - fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0, 0.0, 0.0); + fDevice.CurrentRenderCommandEncoder()->setDepthBias(0.0f, 0.0f, 0.0f); } } @@ -2069,7 +2069,7 @@ void plMetalPipeline::IHandleBlendMode(hsGMatState flags) // done GPU side - but the GPU can't write an error state on a CPU // side buffer. if (flags.fBlendFlags & hsGMatState::kBlendNoColor) { - flags.fBlendFlags |= 0x80000000; + flags.fBlendFlags |= hsGMatState::kBlendAlphaPremultiplied; } else { switch (flags.fBlendFlags & hsGMatState::kBlendMask) { case hsGMatState::kBlendDetail: @@ -2389,7 +2389,7 @@ void plMetalPipeline::IEnableLight(size_t i, plLightInfo* light) fLights.lampSources[i].spotProps = {falloff, gamma, phi}; } else { - fLights.lampSources[i].spotProps = {0.0, 0.0, 0.0}; + fLights.lampSources[i].spotProps = {0.0f, 0.0f, 0.0f}; } } else { IDisableLight(i); @@ -3877,12 +3877,12 @@ void plMetalPipeline::ISetShadowLightState(hsGMaterial* mat) fCurrentRenderPassUniforms->diffuseCol.r = fCurrentRenderPassUniforms->diffuseCol.g = fCurrentRenderPassUniforms->diffuseCol.b = 1.f; fCurrentRenderPassUniforms->diffuseCol.a = 1.f; - fCurrentRenderPassUniforms->diffuseSrc = 1.0; - fCurrentRenderPassUniforms->emissiveSrc = 1.0; - fCurrentRenderPassUniforms->emissiveCol = 0.0; - fCurrentRenderPassUniforms->specularSrc = 0.0; - fCurrentRenderPassUniforms->ambientSrc = 0.0; - fCurrentRenderPassUniforms->globalAmb = 0.0; + fCurrentRenderPassUniforms->diffuseSrc = 1.0f; + fCurrentRenderPassUniforms->emissiveSrc = 1.0f; + fCurrentRenderPassUniforms->emissiveCol = 0.0f; + fCurrentRenderPassUniforms->specularSrc = 0.0f; + fCurrentRenderPassUniforms->ambientSrc = 0.0f; + fCurrentRenderPassUniforms->globalAmb = 0.0f; } // IDisableLightsForShadow /////////////////////////////////////////////////////////// diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp index 9cd719ad22..1225f4c452 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.cpp @@ -127,10 +127,10 @@ void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescri if (fHasSkinIndices) { skinWeightOffset += sizeof(uint32_t); } - int normOffset = skinWeightOffset + (sizeof(float) * this->fNumWeights); + int normOffset = skinWeightOffset + (sizeof(float) * fNumWeights); int colorOffset = normOffset + sizeof(hsPoint3); int baseUvOffset = colorOffset + (sizeof(uint32_t) * 2); - int stride = baseUvOffset + sizeof(hsPoint3) * this->fNumUVs; + int stride = baseUvOffset + sizeof(hsPoint3) * fNumUVs; vertexDescriptor->attributes()->object(VertexAttributePosition)->setFormat(MTL::VertexFormatFloat3); vertexDescriptor->attributes()->object(VertexAttributePosition)->setBufferIndex(0); @@ -148,7 +148,7 @@ void plMetalRenderSpanPipelineState::ConfigureVertexDescriptor(MTL::VertexDescri vertexDescriptor->attributes()->object(VertexAttributeWeights)->setOffset(weightOneOffset); } - for (int i = 0; i < this->fNumUVs; i++) { + for (int i = 0; i < fNumUVs; i++) { vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setFormat(MTL::VertexFormatFloat3); vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setBufferIndex(0); vertexDescriptor->attributes()->object(VertexAttributeTexcoord + i)->setOffset(baseUvOffset + (i * sizeof(hsPoint3))); @@ -181,7 +181,6 @@ void plMetalRenderSpanPipelineState::ConfigureBlendMode(const uint32_t blendMode descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOne); } else { descriptor->setSourceRGBBlendFactor(MTL::BlendFactorOneMinusSourceAlpha); - ; } descriptor->setDestinationRGBBlendFactor(MTL::BlendFactorSourceAlpha); } else { @@ -328,7 +327,7 @@ void plMetalFragmentShaderDescription::PopulateTextureInfo(const plLayerInterfac bool plMetalMaterialPassPipelineState::IsEqual(const plMetalPipelineState& p) const { - return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fFragmentShaderDescription == this->fFragmentShaderDescription; + return plMetalRenderSpanPipelineState::IsEqual(p) && static_cast(&p)->fFragmentShaderDescription == fFragmentShaderDescription; } MTL::Function* plMetalRenderShadowPipelineState::GetVertexFunction(MTL::Library* library) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h index 46e879600a..0f5cf4114d 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipelineState.h @@ -75,7 +75,7 @@ class plMetalPipelineState void PrewarmRenderPipelineState(); bool operator==(const plMetalPipelineState& p) const { - if ((&p)->GetID() != this->GetID()) { + if ((&p)->GetID() != GetID()) { return false; } else { return IsEqual(p); @@ -97,10 +97,10 @@ class plMetalPipelineState protected: plMetalDevice* fDevice; virtual void GetFunctionConstants(MTL::FunctionConstantValues*) const = 0; - MTL::FunctionConstantValues* MakeFunctionConstants() + MTL::FunctionConstantValues* MakeFunctionConstants() const { MTL::FunctionConstantValues* constants = MTL::FunctionConstantValues::alloc()->init()->autorelease(); - this->GetFunctionConstants(constants); + GetFunctionConstants(constants); return constants; } }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h index 157ae6ce28..c934bf9467 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalShader.h @@ -57,7 +57,7 @@ class plMetalShader : public plMetalDeviceRef plShader* fOwner; plMetalPipeline* fPipe; MTL::Function* fFunction; - + virtual bool ISetConstants(plMetalPipeline* pipe) = 0; // On error, sets error string. public: diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h index 061dbfb5ee..7ae2e4b63f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalTextFont.h @@ -56,9 +56,9 @@ class plMetalDevice; class plMetalTextFontPipelineState : public plMetalPipelineState { public: - plMetalTextFontPipelineState(plMetalDevice* device) : plMetalPipelineState(device){}; + plMetalTextFontPipelineState(plMetalDevice* device) : plMetalPipelineState(device){} bool IsEqual(const plMetalPipelineState& p) const override; - uint16_t GetID() const override { return plMetalPipelineType::Text; }; + uint16_t GetID() const override { return plMetalPipelineType::Text; } plMetalPipelineState* Clone() override; const MTL::Function* GetVertexFunction(MTL::Library* library) override; const MTL::Function* GetFragmentFunction(MTL::Library* library) override; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h index 0f040ac832..41b39923d6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalVertexShader.h @@ -55,10 +55,10 @@ class plMetalVertexShader : public plMetalShader bool ISetConstants(plMetalPipeline* pipe) override; // On error, sets error string. plMetalVertexShader(plShader* owner); ~plMetalVertexShader() override; - + void Link(plMetalVertexShader** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalVertexShader* GetNext() const { return (plMetalVertexShader*)fNext; } - + void Release() override; }; From db3c6d4fe0e2a65b967a46f8b0b292d274dc157f Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 12 Nov 2023 17:19:43 -0800 Subject: [PATCH 157/165] Giving renderer direct access to the Metal layer and removing drawable callback (cherry picked from commit bcb7266f29fc71b136ce9c973fe41aac445ebfec) --- .../Plasma/Apps/plClient/Mac-Cocoa/main.mm | 48 +++++++++++-------- Sources/Plasma/CoreLib/HeadSpin.h | 3 ++ .../pfMetalPipeline/plMetalDevice.h | 5 ++ .../pfMetalPipeline/plMetalPipeline.cpp | 24 ++++++---- .../pfMetalPipeline/plMetalPipeline.h | 2 - 5 files changed, 51 insertions(+), 31 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index c859146d27..1f40d47656 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -174,6 +174,8 @@ @interface AppDelegate : NSWindowController GetPipeline(); - pipeline->currentDrawableCallback = [self] (MTL::Device* device) { - id< CAMetalDrawable > drawable; - id metalDevice = (__bridge id)device; - if (((CAMetalLayer *) _renderLayer).device != metalDevice) { - ((CAMetalLayer *) _renderLayer).device = metalDevice; - dispatch_async(dispatch_get_main_queue(), ^{ - [self updateWindowTitle]; - }); - } - drawable = [((CAMetalLayer *) _renderLayer) nextDrawable]; - CA::MetalDrawable * mtlDrawable = ( __bridge CA::MetalDrawable* ) drawable; - mtlDrawable->retain(); - return mtlDrawable; - }; if (!gClient) { exit(0); @@ -502,6 +490,7 @@ - (void)startClient - (void)updateWindowTitle { +#ifdef PLASMA_PIPELINE_METAL NSString *productTitle = [NSString stringWithSTString:plProduct::LongName()]; id device = ((CAMetalLayer *) self.window.contentView.layer).device; #ifdef HS_DEBUGGING @@ -562,6 +551,23 @@ - (NSApplicationPresentationOptions)window:(NSWindow*)window NSApplicationPresentationAutoHideMenuBar; } +- (void)observeValueForKeyPath:(NSString *)keyPath ofObject:(id)object change:(NSDictionary *)change context:(void *)context +{ + if (context == DeviceDidChangeContext) { + // this may not happen on the main queue + dispatch_async(dispatch_get_main_queue(), ^{ + [self updateWindowTitle]; + }); + } else { + [super observeValueForKeyPath:keyPath ofObject:object change:change context:context]; + } +} + +- (void)dealloc +{ + [_renderLayer removeObserver:self forKeyPath:@"device" context:DeviceDidChangeContext]; +} + @end void PumpMessageQueueProc() diff --git a/Sources/Plasma/CoreLib/HeadSpin.h b/Sources/Plasma/CoreLib/HeadSpin.h index ffafc38f65..6f0cf62804 100644 --- a/Sources/Plasma/CoreLib/HeadSpin.h +++ b/Sources/Plasma/CoreLib/HeadSpin.h @@ -73,6 +73,9 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com typedef HINSTANCE HMODULE; typedef long HRESULT; typedef void* HANDLE; +#elif HS_BUILD_FOR_MACOS + typedef void* hsWindowHndl; + typedef void* hsWindowInst; #else typedef int32_t* hsWindowHndl; typedef int32_t* hsWindowInst; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h index 1998fcc4dd..b7d493659b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.h @@ -209,6 +209,9 @@ class plMetalDevice std::mutex fPipelineCreationMtx; void StartPipelineBuild(plMetalPipelineRecord& record, std::condition_variable** condOut); std::condition_variable* PrewarmPipelineStateFor(plMetalPipelineState* pipelineState); + + void SetOutputLayer(CA::MetalLayer* layer) { fLayer = layer; } + CA::MetalLayer* GetOutputLayer() const { return fLayer; }; protected: plMetalLinkedPipeline* PipelineState(plMetalPipelineState* pipelineState); @@ -226,6 +229,8 @@ class plMetalDevice MTL::CommandBuffer* fCurrentCommandBuffer; MTL::CommandBuffer* fCurrentOffscreenCommandBuffer; MTL::RenderCommandEncoder* fCurrentRenderTargetCommandEncoder; + + CA::MetalLayer* fLayer; MTL::Texture* fCurrentDrawableDepthTexture; MTL::Texture* fCurrentFragmentOutputTexture; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 8efe2606a5..63bfecf1a7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -163,7 +163,6 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons fRenderTargetRefList(), fMatRefList(), fCurrentRenderPassUniforms(), - currentDrawableCallback(), fFragFunction(), fVShaderRefList(), fPShaderRefList(), @@ -180,6 +179,11 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons fDevice.fPipeline = this; fMaxLayersAtOnce = 8; + + fDevice.SetOutputLayer(static_cast(window)); + // For now - set this once at startup. If the underlying device is allow to change on + // the fly (eGPU, display change, etc) - revisit. + fDevice.GetOutputLayer()->setDevice(fDevice.fMetalDevice); // Default our output format to 8 bit BGRA. Client may immediately change this to // the actual framebuffer format. @@ -623,9 +627,12 @@ bool plMetalPipeline::BeginRender() IPreprocessShadows(); IPreprocessAvatarTextures(); - CA::MetalDrawable* drawable = currentDrawableCallback(fDevice.fMetalDevice); + CA::MetalLayer* outputLayer = fDevice.GetOutputLayer(); + + CA::MetalDrawable* drawable = fDevice.GetOutputLayer()->nextDrawable()->retain(); if (!drawable) { - fCurrentPool->release(); + // no framebuffer available - abort + EndRender(); return true; } fDevice.CreateNewCommandBuffer(drawable); @@ -667,6 +674,7 @@ bool plMetalPipeline::EndRender() } } fCurrentPool->release(); + fCurrentPool = nullptr; return retVal; } @@ -981,9 +989,11 @@ void plMetalPipeline::GetSupportedDisplayModes(std::vector* res, */ std::vector supported; + CA::MetalLayer* layer = fDevice.GetOutputLayer(); + CGSize drawableSize = layer->drawableSize(); supported.emplace_back(); - supported[0].Width = 800; - supported[0].Height = 600; + supported[0].Width = drawableSize.width; + supported[0].Height = drawableSize.height; supported[0].ColorDepth = 32; *res = supported; @@ -1013,9 +1023,7 @@ int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) void plMetalPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync) { - // FIXME: What's this? - // Seems like an entry point for passing in display settings. - + Resize(Width, Height); fDevice.SetMaxAnsiotropy(MaxAnisotropicSamples); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 4e393ef16d..6b51257a06 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -87,8 +87,6 @@ class plRenderPrimFunc class plMetalPipeline : public pl3DPipeline { public: - // The actual client should set this callback so we can retrieve drawables from the window server - std::function currentDrawableCallback; // caching the frag function here so that the shader compiler can quickly access it MTL::Function* fFragFunction; From 1570887af6ed42eed7d69ac22f90966cd072591c Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 12 Nov 2023 17:32:36 -0800 Subject: [PATCH 158/165] Indentation fixes for Obj-C pipeline files and Cyan header for shader --- .../FeatureLib/pfMetalPipeline/.clang-format | 31 ++++++ .../ShaderSrc/WaveDecEnv.metal | 47 +++++++-- .../plMetalDevicePerformanceShaders.mm | 96 ++++++++++--------- .../pfMetalPipeline/plMetalEnumerate.mm | 81 ++++++++-------- 4 files changed, 162 insertions(+), 93 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format b/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format index dcb3b2f163..76d0387f80 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format @@ -37,3 +37,34 @@ SpacesBeforeTrailingComments: 1 --- Language: ObjC # Obj-C specific settings go here +IndentWidth: 4 +AccessModifierOffset: -4 +ColumnLimit: 0 +BreakBeforeBraces: Custom +BraceWrapping: + AfterClass: true + AfterControlStatement: Never + AfterEnum: true + AfterFunction: true + AfterNamespace: true + AfterObjCDeclaration: true + AfterStruct: true + AfterUnion: true + BeforeCatch: false + BeforeElse: false + IndentBraces: false +IndentAccessModifiers: false +AlignConsecutiveDeclarations: AcrossComments +AlignConsecutiveAssignments: + Enabled: False +ReflowComments: true +LambdaBodyIndentation: OuterScope +AllowShortBlocksOnASingleLine: true +AllowShortFunctionsOnASingleLine: true +AllowShortIfStatementsOnASingleLine: true +PackConstructorInitializers: CurrentLine +ObjCBlockIndentWidth: 4 +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesBeforeTrailingComments: 1 diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index 6630e39565..f74fe431fe 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -1,9 +1,44 @@ -// -// WaveDecEnv.metal -// plGLClient -// -// Created by Colin Cornaby on 1/2/22. -// +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ #include using namespace metal; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm index 2760a02ce2..666f1a7008 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevicePerformanceShaders.mm @@ -46,55 +46,57 @@ #include "plMetalDevice.h" void plMetalDevice::EncodeBlur(MTL::CommandBuffer* commandBuffer, MTL::Texture* texture, - float sigma) { - // FIXME: Blurring currently ends a pass - and restarting a pass will possibly clear one or more - // buffers Technically shadow blurring only happens at the end of the render pass though... - CurrentRenderCommandEncoder()->endEncoding(); - fCurrentRenderTargetCommandEncoder->release(); - fCurrentRenderTargetCommandEncoder = nil; + float sigma) +{ + // FIXME: Blurring currently ends a pass - and restarting a pass will possibly clear one or more + // buffers Technically shadow blurring only happens at the end of the render pass though... + CurrentRenderCommandEncoder()->endEncoding(); + fCurrentRenderTargetCommandEncoder->release(); + fCurrentRenderTargetCommandEncoder = nil; - // look up the shader by sigma value - MPSImageGaussianBlur* blur = (MPSImageGaussianBlur*)fBlurShaders[sigma]; + // look up the shader by sigma value + MPSImageGaussianBlur* blur = (MPSImageGaussianBlur*)fBlurShaders[sigma]; - // we don't have one, need to create one - if (!blur) { - blur = [[MPSImageGaussianBlur alloc] initWithDevice:(id)fMetalDevice sigma:sigma]; - fBlurShaders[sigma] = (NS::Object*)blur; - } + // we don't have one, need to create one + if (!blur) { + blur = [[MPSImageGaussianBlur alloc] initWithDevice:(id)fMetalDevice + sigma:sigma]; + fBlurShaders[sigma] = (NS::Object*)blur; + } - // we'd like to do the blur in place, but Metal might not let us. - // if it allocates a new texture, we'll have to glit that data back to the original - id destTexture = (id)texture; - bool result = - [blur encodeToCommandBuffer:(id)commandBuffer - inPlaceTexture:(id*)&destTexture - fallbackCopyAllocator:^id( - MPSKernel* kernel, id commandBuffer, id texture) { - // this copy allocator will release the original texture - that texture is important, - // don't let it - [texture retain]; - MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor( - (MTL::PixelFormat)texture.pixelFormat, texture.width, texture.height, false); - descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); - return (id)fMetalDevice->newTexture(descriptor)->autorelease(); - }]; + // we'd like to do the blur in place, but Metal might not let us. + // if it allocates a new texture, we'll have to glit that data back to the original + id destTexture = (id)texture; + bool result = + [blur encodeToCommandBuffer:(id)commandBuffer + inPlaceTexture:(id*)&destTexture + fallbackCopyAllocator:^id( + MPSKernel* kernel, id commandBuffer, id texture) { + // this copy allocator will release the original texture - that texture is + // important, don't let it + [texture retain]; + MTL::TextureDescriptor* descriptor = MTL::TextureDescriptor::texture2DDescriptor( + (MTL::PixelFormat)texture.pixelFormat, texture.width, texture.height, false); + descriptor->setUsage(MTL::TextureUsageShaderRead | MTL::TextureUsageShaderWrite); + return (id)fMetalDevice->newTexture(descriptor)->autorelease(); + }]; - // did Metal change our original texture? - if (destTexture != (id)texture) { - // we'll need to blit the dest texture back to the source - // we just committed a compute pass, buffer should be free for us to create - // a blit encoder - id blitEncoder = - [(id)GetCurrentCommandBuffer() blitCommandEncoder]; - [blitEncoder copyFromTexture:destTexture - sourceSlice:0 - sourceLevel:0 - sourceOrigin:MTLOriginMake(0, 0, 0) - sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 1) - toTexture:(id)texture - destinationSlice:0 - destinationLevel:0 - destinationOrigin:MTLOriginMake(0, 0, 0)]; - [blitEncoder endEncoding]; - } + // did Metal change our original texture? + if (destTexture != (id)texture) { + // we'll need to blit the dest texture back to the source + // we just committed a compute pass, buffer should be free for us to create + // a blit encoder + id blitEncoder = + [(id)GetCurrentCommandBuffer() blitCommandEncoder]; + [blitEncoder copyFromTexture:destTexture + sourceSlice:0 + sourceLevel:0 + sourceOrigin:MTLOriginMake(0, 0, 0) + sourceSize:MTLSizeMake(destTexture.width, destTexture.height, 1) + toTexture:(id)texture + destinationSlice:0 + destinationLevel:0 + destinationOrigin:MTLOriginMake(0, 0, 0)]; + [blitEncoder endEncoding]; + } } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm index f67485491e..eaef5f157e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalEnumerate.mm @@ -50,45 +50,46 @@ #include #include "plMetalPipeline.h" -void plMetalEnumerate::Enumerate(std::vector& records) { - // For now - just use the default device. If there is a high power discrete device - this will - // spin it up. This will also automatically pin us to an eGPU if present and the user has - // configured us to use it. - MTL::Device* device = MTL::CreateSystemDefaultDevice(); - - if (device) { - hsG3DDeviceRecord devRec; - devRec.SetG3DDeviceType(hsG3DDeviceSelector::kDevTypeMetal); - devRec.SetDriverName("Metal"); - devRec.SetDeviceDesc(device->name()->utf8String()); - // Metal has ways to query capabilities, but doesn't expose a flat version - // Populate with the OS version - @autoreleasepool { - NSProcessInfo* processInfo = [NSProcessInfo processInfo]; - NSOperatingSystemVersion version = processInfo.operatingSystemVersion; - NSString* versionString = - [NSString stringWithFormat:@"%li.%li.%li", (long)version.majorVersion, - (long)version.minorVersion, version.patchVersion]; - devRec.SetDriverVersion([versionString cStringUsingEncoding:NSUTF8StringEncoding]); +void plMetalEnumerate::Enumerate(std::vector& records) +{ + // For now - just use the default device. If there is a high power discrete device - this will + // spin it up. This will also automatically pin us to an eGPU if present and the user has + // configured us to use it. + MTL::Device* device = MTL::CreateSystemDefaultDevice(); + + if (device) { + hsG3DDeviceRecord devRec; + devRec.SetG3DDeviceType(hsG3DDeviceSelector::kDevTypeMetal); + devRec.SetDriverName("Metal"); + devRec.SetDeviceDesc(device->name()->utf8String()); + // Metal has ways to query capabilities, but doesn't expose a flat version + // Populate with the OS version + @autoreleasepool { + NSProcessInfo* processInfo = [NSProcessInfo processInfo]; + NSOperatingSystemVersion version = processInfo.operatingSystemVersion; + NSString* versionString = + [NSString stringWithFormat:@"%li.%li.%li", (long)version.majorVersion, + (long)version.minorVersion, version.patchVersion]; + devRec.SetDriverVersion([versionString cStringUsingEncoding:NSUTF8StringEncoding]); + } + devRec.SetDriverDesc(device->name()->utf8String()); + + devRec.SetCap(hsG3DDeviceSelector::kCapsMipmap); + devRec.SetCap(hsG3DDeviceSelector::kCapsPerspective); + devRec.SetCap(hsG3DDeviceSelector::kCapsCompressTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsDoesSmallTextures); + devRec.SetCap(hsG3DDeviceSelector::kCapsPixelShader); + devRec.SetCap(hsG3DDeviceSelector::kCapsHardware); + + devRec.SetLayersAtOnce(8); + + // Just make a fake mode so the device selector will let it through + hsG3DDeviceMode devMode; + devMode.SetWidth(hsG3DDeviceSelector::kDefaultWidth); + devMode.SetHeight(hsG3DDeviceSelector::kDefaultHeight); + devMode.SetColorDepth(hsG3DDeviceSelector::kDefaultDepth); + devRec.GetModes().emplace_back(devMode); + + records.emplace_back(devRec); } - devRec.SetDriverDesc(device->name()->utf8String()); - - devRec.SetCap(hsG3DDeviceSelector::kCapsMipmap); - devRec.SetCap(hsG3DDeviceSelector::kCapsPerspective); - devRec.SetCap(hsG3DDeviceSelector::kCapsCompressTextures); - devRec.SetCap(hsG3DDeviceSelector::kCapsDoesSmallTextures); - devRec.SetCap(hsG3DDeviceSelector::kCapsPixelShader); - devRec.SetCap(hsG3DDeviceSelector::kCapsHardware); - - devRec.SetLayersAtOnce(8); - - // Just make a fake mode so the device selector will let it through - hsG3DDeviceMode devMode; - devMode.SetWidth(hsG3DDeviceSelector::kDefaultWidth); - devMode.SetHeight(hsG3DDeviceSelector::kDefaultHeight); - devMode.SetColorDepth(hsG3DDeviceSelector::kDefaultDepth); - devRec.GetModes().emplace_back(devMode); - - records.emplace_back(devRec); - } } From 986e8d3405fdfd470d14dcecb5ec5858d7138531 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 12 Nov 2023 17:56:32 -0800 Subject: [PATCH 159/165] More cleanup --- .../pfMetalPipeline/plMetalPipeline.cpp | 102 +++++++++--------- .../PubUtilLib/plSurface/hsGMaterial.cpp | 6 +- 2 files changed, 53 insertions(+), 55 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 63bfecf1a7..4b36db789f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -1301,16 +1301,16 @@ void plMetalPipeline::IRenderProjection(const plRenderPrimFunc& render, plLightI IScaleLight(0, true); - fCurrentRenderPassUniforms->ambientSrc = 1.0; - fCurrentRenderPassUniforms->diffuseSrc = 1.0; - fCurrentRenderPassUniforms->emissiveSrc = 1.0; - fCurrentRenderPassUniforms->specularSrc = 1.0; - fCurrentRenderPassUniforms->globalAmb = {1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->emissiveCol = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->fogColor = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->diffuseCol = {1.0, 1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->ambientSrc = 1; + fCurrentRenderPassUniforms->diffuseSrc = 1; + fCurrentRenderPassUniforms->emissiveSrc = 1; + fCurrentRenderPassUniforms->specularSrc = 1; + fCurrentRenderPassUniforms->globalAmb = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->emissiveCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->fogColor = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->diffuseCol = {1.f, 1.f, 1.f, 1.f}; matrix_float4x4 tXfm; hsMatrix2SIMD(proj->GetTransform(), &tXfm); @@ -2112,18 +2112,18 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye // plProfile_Inc(MatLightState); if (IsDebugFlagSet(plPipeDbg::kFlagAllBright)) { - fCurrentRenderPassUniforms->globalAmb = {1.0, 1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->globalAmb = {1.f, 1.f, 1.f, 1.f}; - fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->diffuseCol = {1.0, 1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->emissiveCol = {1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->emissiveCol = {1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->specularCol = {1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->ambientCol = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->diffuseCol = {1.f, 1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->emissiveCol = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->emissiveCol = {1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->specularCol = {1.f, 1.f, 1.f}; - fCurrentRenderPassUniforms->ambientSrc = 1.0; - fCurrentRenderPassUniforms->diffuseSrc = 1.0; - fCurrentRenderPassUniforms->emissiveSrc = 1.0; - fCurrentRenderPassUniforms->specularSrc = 1.0; + fCurrentRenderPassUniforms->ambientSrc = 1; + fCurrentRenderPassUniforms->diffuseSrc = 1; + fCurrentRenderPassUniforms->emissiveSrc = 1; + fCurrentRenderPassUniforms->specularSrc = 1; return; } @@ -2143,14 +2143,14 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye case plSpan::kLiteMaterial: // Material shading { if (state.fShadeFlags & hsGMatState::kShadeWhite) { - fCurrentRenderPassUniforms->globalAmb = {1.0, 1.0, 1.0, 1.0}; - fCurrentRenderPassUniforms->ambientCol = {1.0, 1.0, 1.0}; + fCurrentRenderPassUniforms->globalAmb = {1.f, 1.f, 1.f, 1.f}; + fCurrentRenderPassUniforms->ambientCol = {1.f, 1.f, 1.f}; } else if (IsDebugFlagSet(plPipeDbg::kFlagNoPreShade)) { - fCurrentRenderPassUniforms->globalAmb = {0.0, 0.0, 0.0, 1.0}; - fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->globalAmb = {0.f, 0.f, 0.f, 1.f}; + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; } else { hsColorRGBA amb = currLayer->GetPreshadeColor(); - fCurrentRenderPassUniforms->globalAmb = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.0}; + fCurrentRenderPassUniforms->globalAmb = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), 1.f}; fCurrentRenderPassUniforms->ambientCol = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b)}; } @@ -2168,17 +2168,17 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye mat.Power = currLayer->GetSpecularPower(); #endif } else { - fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; } - fCurrentRenderPassUniforms->diffuseSrc = 1.0; - fCurrentRenderPassUniforms->emissiveSrc = 1.0; - fCurrentRenderPassUniforms->specularSrc = 1.0; + fCurrentRenderPassUniforms->diffuseSrc = 1.f; + fCurrentRenderPassUniforms->emissiveSrc = 1.f; + fCurrentRenderPassUniforms->specularSrc = 1.f; if (state.fShadeFlags & hsGMatState::kShadeNoShade) { - fCurrentRenderPassUniforms->ambientSrc = 1.0; + fCurrentRenderPassUniforms->ambientSrc = 1.f; } else { - fCurrentRenderPassUniforms->ambientSrc = 0.0; + fCurrentRenderPassUniforms->ambientSrc = 0.f; } fCurrLightingMethod = plSpan::kLiteMaterial; @@ -2187,20 +2187,20 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye case plSpan::kLiteVtxPreshaded: // Vtx preshaded { - fCurrentRenderPassUniforms->globalAmb = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->diffuseCol = {0.0, 0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->emissiveCol = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->globalAmb = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->diffuseCol = {0.f, 0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->emissiveCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; - fCurrentRenderPassUniforms->diffuseSrc = 0.0; - fCurrentRenderPassUniforms->ambientSrc = 1.0; - fCurrentRenderPassUniforms->specularSrc = 1.0; + fCurrentRenderPassUniforms->diffuseSrc = 0.f; + fCurrentRenderPassUniforms->ambientSrc = 1.f; + fCurrentRenderPassUniforms->specularSrc = 1.f; if (state.fShadeFlags & hsGMatState::kShadeEmissive) { - fCurrentRenderPassUniforms->emissiveSrc = 0.0; + fCurrentRenderPassUniforms->emissiveSrc = 0.f; } else { - fCurrentRenderPassUniforms->emissiveSrc = 1.0; + fCurrentRenderPassUniforms->emissiveSrc = 1.f; } fCurrLightingMethod = plSpan::kLiteVtxPreshaded; @@ -2209,8 +2209,8 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye case plSpan::kLiteVtxNonPreshaded: // Vtx non-preshaded { - fCurrentRenderPassUniforms->ambientCol = {0.0, 0.0, 0.0}; - fCurrentRenderPassUniforms->diffuseCol = {0.0, 0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->ambientCol = {0.f, 0.f, 0.f}; + fCurrentRenderPassUniforms->diffuseCol = {0.f, 0.f, 0.f, 0.f}; hsColorRGBA em = currLayer->GetAmbientColor(); fCurrentRenderPassUniforms->emissiveCol = {static_cast(em.r), static_cast(em.g), static_cast(em.b)}; @@ -2223,16 +2223,16 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye mat.Power = currLayer->GetSpecularPower(); #endif } else { - fCurrentRenderPassUniforms->specularCol = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->specularCol = {0.f, 0.f, 0.f}; } hsColorRGBA amb = currLayer->GetPreshadeColor(); fCurrentRenderPassUniforms->globalAmb = {static_cast(amb.r), static_cast(amb.g), static_cast(amb.b), static_cast(amb.a)}; - fCurrentRenderPassUniforms->ambientSrc = 0.0; - fCurrentRenderPassUniforms->diffuseSrc = 0.0; - fCurrentRenderPassUniforms->emissiveSrc = 1.0; - fCurrentRenderPassUniforms->specularSrc = 1.0; + fCurrentRenderPassUniforms->ambientSrc = 0; + fCurrentRenderPassUniforms->diffuseSrc = 0; + fCurrentRenderPassUniforms->emissiveSrc = 1; + fCurrentRenderPassUniforms->specularSrc = 1; fCurrLightingMethod = plSpan::kLiteVtxNonPreshaded; break; @@ -2272,13 +2272,13 @@ void plMetalPipeline::ICalcLighting(plMetalMaterialShaderRef* mRef, const plLaye } default: fCurrentRenderPassUniforms->fogExponential = 0; - fCurrentRenderPassUniforms->fogValues = {0.0, 0.0}; - fCurrentRenderPassUniforms->fogColor = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->fogValues = {0.f, 0.f}; + fCurrentRenderPassUniforms->fogColor = {0.f, 0.f, 0.f}; break; } if (currLayer->GetBlendFlags() & (hsGMatState::kBlendAdd | hsGMatState::kBlendMADD | hsGMatState::kBlendAddColorTimesAlpha)) { - fCurrentRenderPassUniforms->fogColor = {0.0, 0.0, 0.0}; + fCurrentRenderPassUniforms->fogColor = {0.f, 0.f, 0.f}; } } diff --git a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp index bd2c6be948..b22bfb3866 100644 --- a/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp +++ b/Sources/Plasma/PubUtilLib/plSurface/hsGMaterial.cpp @@ -64,10 +64,8 @@ hsGMaterial::hsGMaterial() : fLOD(0), fCompFlags(0), fLoadFlags(0), -fLastUpdateTime(0) -#if PLASMA_PIPELINE_GL || PLASMA_PIPELINE_METAL -,fDeviceRef(nullptr) -#endif +fLastUpdateTime(0), +fDeviceRef() { } From 7b548f4f58d98b3b235d35c66ba687e1b0ee9874 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Wed, 15 Nov 2023 22:00:26 -0800 Subject: [PATCH 160/165] Fixes to fullscreen behavior on startup for Mac --- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp | 7 +++++-- .../Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp index 4b36db789f..59aebe384a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.cpp @@ -178,7 +178,9 @@ plMetalPipeline::plMetalPipeline(hsWindowHndl display, hsWindowHndl window, cons fCurrLayerIdx = 0; fDevice.fPipeline = this; - fMaxLayersAtOnce = 8; + fMaxLayersAtOnce = devMode->GetDevice()->GetLayersAtOnce(); + + fIsFullscreen = !fInitialPipeParams.Windowed; fDevice.SetOutputLayer(static_cast(window)); // For now - set this once at startup. If the underlying device is allow to change on @@ -716,7 +718,7 @@ void plMetalPipeline::RenderScreenElements() plProfile_EndTiming(Reset); } -bool plMetalPipeline::IsFullScreen() const { return !fDefaultPipeParams.Windowed; } +bool plMetalPipeline::IsFullScreen() const { return fIsFullscreen; } void plMetalPipeline::Resize(uint32_t width, uint32_t height) { @@ -1023,6 +1025,7 @@ int plMetalPipeline::GetMaxAntiAlias(int Width, int Height, int ColorDepth) void plMetalPipeline::ResetDisplayDevice(int Width, int Height, int ColorDepth, bool Windowed, int NumAASamples, int MaxAnisotropicSamples, bool vSync) { + fIsFullscreen = !Windowed; Resize(Width, Height); fDevice.SetMaxAnsiotropy(MaxAnisotropicSamples); } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 6b51257a06..2d9541780f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -168,6 +168,8 @@ class plMetalPipeline : public pl3DPipeline private: VertexUniforms* fCurrentRenderPassUniforms; + + bool fIsFullscreen; void FindFragFunction(); From f3d976498ae50932447060b4781148b7c2374cb9 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 17 Nov 2023 21:40:56 -0800 Subject: [PATCH 161/165] Removing setting of NSWindow for client window --- Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index 1f40d47656..a43e3f8673 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -197,6 +197,7 @@ - (id)init [window setDelegate:self]; gClient.SetClientWindow((__bridge void *)view.layer); + gClient.SetClientDisplay((hsWindowHndl)NULL); self = [super initWithWindow:window]; self.window.acceptsMouseMovedEvents = YES; @@ -469,9 +470,6 @@ - (void)startClient forKeyPath:@"device" options:NSKeyValueObservingOptionNew | NSKeyValueObservingOptionInitial context:DeviceDidChangeContext]; - - gClient.SetClientWindow((hsWindowHndl)(__bridge void*)self.window); - gClient.SetClientDisplay((hsWindowHndl)NULL); if (!gClient) { exit(0); From e34919fb2edf1bc6f7cb25b029cbe419ba77d2dd Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Fri, 17 Nov 2023 21:42:41 -0800 Subject: [PATCH 162/165] Using stringWithSTString to set window title --- Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm index a43e3f8673..0101728f20 100644 --- a/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm +++ b/Sources/Plasma/Apps/plClient/Mac-Cocoa/main.mm @@ -505,7 +505,7 @@ - (void)updateWindowTitle #endif #else - [self.window setTitle:[NSString stringWithCString:plProduct::LongName().c_str() encoding:NSUTF8StringEncoding]]; + [NSString stringWithSTString:plProduct::LongName()]; #endif } From 0df34e5a47942cbce7e5ef7930454c86a4183320 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sat, 25 Nov 2023 13:45:30 -0800 Subject: [PATCH 163/165] Fixing pink screen at start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The render command encoder is create lazily - and clear is not a draw call in Metal. A clear only pass meant no render was being encoded for the command buffer. The render command encoder management could use a bit of cleaning - but for now manually forcing the lazy creation of a command encoder. This could be probably moved to a non-lazy model - but it’s lazy right now because we’re holding for a clear command. --- .../pfMetalPipeline/plMetalDevice.cpp | 32 +++++++++++++++---- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp index ce452f2eef..1166cefc4e 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDevice.cpp @@ -226,15 +226,18 @@ void plMetalDevice::ReleaseSamplerStates() void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool shouldClearDepth, float clearDepth) { - // Plasma may clear a target and draw at different times. - // This is specifically trouble with the drawable clear - // Plasma might clear the drawable, and then go off and do - // off screen stuff. Metal doesn't work that way, we need to - // draw and clear at the same time. So if it's a clear for the - // current drawable, remember that and perform the clear when - // we're actually drawing to screen. + /* + In Metal, a clear is an argument to the drawable loading operation, + not an operation that can be done freely at any time. So lets handle + a clear two ways: + 1) If we're in the middle of a rendering pass, manually clear. + 2) If we're at the begining of a render pass, note the clear color + we should use to clear the framebuffer at load. + */ if (fCurrentRenderTargetCommandEncoder) { + // We're mid flight, we'll need to manually paint the clear color + half4 clearColor; clearColor[0] = clearColor.r; clearColor[1] = clearColor.g; @@ -259,6 +262,9 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh CurrentRenderCommandEncoder()->setFragmentBytes(&clearDepth, sizeof(float), 1); CurrentRenderCommandEncoder()->drawPrimitives(MTL::PrimitiveTypeTriangleStrip, NS::UInteger(0), NS::UInteger(4)); } else { + // Render has not started yet! Note which clear color we should use + // for clearing the render buffer when we load it. + if (shouldClearColor) { if (fCurrentRenderTarget) { fClearRenderTargetColor = clearColor; @@ -274,6 +280,18 @@ void plMetalDevice::Clear(bool shouldClearColor, simd_float4 clearColor, bool sh } } } + + /* + Clear needs to count as a render operation, but Metal treats + it as an argument when starting a new render encoder. If a + render pass only cleared, but never rendered any content, + the clear would never happen because no render encoder would + be created. + + Force render encoder creation to force the clear to happen. + */ + + CurrentRenderCommandEncoder(); } } From 6c96be0cb394b0a716032f99a03a0ea0d1d1652a Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Sun, 26 Nov 2023 20:47:33 -0800 Subject: [PATCH 164/165] Review feedback updates --- .../FeatureLib/pfMetalPipeline/CMakeLists.txt | 8 +- .../pfMetalPipeline/ShaderSrc/Avatar.metal | 16 +-- .../ShaderSrc/BiasNormals.metal | 45 ++++---- .../ShaderSrc/CompCosines.metal | 47 ++++---- .../ShaderSrc/FixedPipelineShaders.metal | 105 +++++++++--------- .../ShaderSrc/GammaCorrection.metal | 16 ++- .../pfMetalPipeline/ShaderSrc/Grass.metal | 73 +++++++++--- .../ShaderSrc/PlateShaders.metal | 25 +++-- .../ShaderSrc/TextFontShader.metal | 10 +- .../ShaderSrc/WaveDec1Lay_7.metal | 16 ++- .../ShaderSrc/WaveDecEnv.metal | 18 +-- .../pfMetalPipeline/ShaderSrc/WaveRip.metal | 16 ++- .../pfMetalPipeline/ShaderSrc/WaveSet7.metal | 18 +-- .../pfMetalPipeline/plMetalDeviceRef.h | 2 +- .../plMetalMaterialShaderRef.cpp | 15 +-- .../plMetalMaterialShaderRef.h | 14 +-- .../pfMetalPipeline/plMetalPipeline.h | 12 +- .../pfMetalPipeline/plMetalPlateManager.cpp | 2 +- 18 files changed, 262 insertions(+), 196 deletions(-) diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt index 194097c557..2fc73c1de7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/CMakeLists.txt @@ -1,9 +1,9 @@ include(FetchContent) FetchContent_Declare( - metalcpp - URL_HASH_SHA256 0afd87ca851465191ae4e3980aa036c7e9e02fe32e7c760ac1a74244aae6023b - URL "https://developer.apple.com/metal/cpp/files/metal-cpp_macOS13.3_iOS16.4.zip" + metalcpp + URL_HASH_SHA256 0afd87ca851465191ae4e3980aa036c7e9e02fe32e7c760ac1a74244aae6023b + URL "https://developer.apple.com/metal/cpp/files/metal-cpp_macOS13.3_iOS16.4.zip" ) FetchContent_MakeAvailable(metalcpp) @@ -59,7 +59,7 @@ target_include_directories(pfMetalPipeline PUBLIC "ShaderSrc") source_group("Source Files" FILES ${pfMetalPipeline_SOURCES}) source_group("Header Files" FILES ${pfMetalPipeline_HEADERS}) -add_library( pfMetalPipelineShaders INTERFACE ) +add_library(pfMetalPipelineShaders INTERFACE) set(pfMetalPipeline_SHADERS ShaderSrc/FixedPipelineShaders.metal ShaderSrc/PlateShaders.metal diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal index f6626e2e03..e828db9f79 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Avatar.metal @@ -44,7 +44,8 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com using namespace metal; -typedef struct { +typedef struct +{ float4 position [[position]]; float2 uvPosition; half4 color; @@ -56,20 +57,21 @@ typedef struct float2 uvPostion [[attribute(1)]]; } PreprocessAvatarVertex; -vertex PreprocessAvatarTexturesInOut PreprocessAvatarVertexShader(PreprocessAvatarVertex in [[stage_in]]) { +vertex PreprocessAvatarTexturesInOut PreprocessAvatarVertexShader(PreprocessAvatarVertex in [[stage_in]]) +{ return { float4(in.position.x, in.position.y, 0.0, 1.0 ), in.uvPostion }; } -fragment half4 PreprocessAvatarFragmentShader(PreprocessAvatarTexturesInOut in [[stage_in]], - texture2d layer [[ texture(0) ]], - constant float4& blendColor [[ buffer(0 )]]) +fragment half4 PreprocessAvatarFragmentShader(PreprocessAvatarTexturesInOut in [[stage_in]], + texture2d layer [[ texture(0) ]], + constant float4& blendColor [[ buffer(0 )]]) { constexpr sampler colorSampler(mip_filter::linear, mag_filter::linear, min_filter::linear, address::clamp_to_zero); - + half4 colorSample = layer.sample(colorSampler, in.uvPosition.xy) * half4(blendColor); - + return colorSample; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal index c9421562a3..bb055a7feb 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/BiasNormals.metal @@ -45,7 +45,8 @@ using namespace metal; #include "ShaderVertex.h" -typedef struct { +typedef struct +{ float4 TexU0; float4 TexV0; @@ -57,7 +58,8 @@ typedef struct { float4 ScaleBias; } vs_BiasNormalsUniforms; -typedef struct { +typedef struct +{ float4 position [[position]]; float4 texCoord0; float4 texCoord1; @@ -66,25 +68,22 @@ typedef struct { float4 color2; } vs_BiasNormalsOut; -vertex vs_BiasNormalsOut vs_BiasNormals(Vertex in [[stage_in]], - constant vs_BiasNormalsUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +vertex vs_BiasNormalsOut vs_BiasNormals(Vertex in [[ stage_in ]], + constant vs_BiasNormalsUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ vs_BiasNormalsOut out; out.position = float4(in.position, 1.0); - out.texCoord0 = float4( - dot(float4(in.texCoord1, 1.0), uniforms.TexU0), + out.texCoord0 = float4(dot(float4(in.texCoord1, 1.0), uniforms.TexU0), dot(float4(in.texCoord1, 1.0), uniforms.TexV0), - 0, - 1 - ); + 0.f, + 1.f); - out.texCoord1 = float4( - dot(float4(in.texCoord1, 1.0), uniforms.TexU1), + out.texCoord1 = float4(dot(float4(in.texCoord1, 1.0), uniforms.TexU1), dot(float4(in.texCoord1, 1.0), uniforms.TexV1), - 0, - 1 - ); + 0.f, + 1.f); out.color1 = uniforms.ScaleBias.xxzz; out.color2 = uniforms.ScaleBias.yyzz; @@ -92,9 +91,10 @@ vertex vs_BiasNormalsOut vs_BiasNormals(Vertex in [[stage_in]], return out; } -fragment float4 ps_BiasNormals(vs_BiasNormalsOut in [[stage_in]], - texture2d t0 [[ texture(0) ]], - texture2d t1 [[ texture(1) ]]) { +fragment float4 ps_BiasNormals(vs_BiasNormalsOut in [[ stage_in ]], + texture2d t0 [[ texture(0) ]], + texture2d t1 [[ texture(1) ]]) +{ // Composite the cosines together. // Input map is cosine(pix) for each of // the 4 waves. @@ -111,13 +111,14 @@ fragment float4 ps_BiasNormals(vs_BiasNormalsOut in [[stage_in]], // Note also the c4 used for biasing back at the end. constexpr sampler colorSampler = sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::repeat); + mag_filter::linear, + min_filter::linear, + address::repeat); + float4 sample1 = t0.sample(colorSampler, in.texCoord0.xy); float4 sample2 = t1.sample(colorSampler, in.texCoord0.xy); - float4 out = float4(sample1.rgb - 0.5 + sample2.rgb - 0.5, - sample1.a + sample2.a); + float4 out = float4(sample1.rgb - 0.5 + sample2.rgb - 0.5, sample1.a + sample2.a); out.rgb = (out.rgb * in.color1.rgb) + in.color2.rgb; + return out; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal index aedd304976..a3620fc26f 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/CompCosines.metal @@ -45,7 +45,8 @@ using namespace metal; #include "ShaderVertex.h" -typedef struct { +typedef struct +{ float4 c0; float4 c1; float4 c2; @@ -53,7 +54,8 @@ typedef struct { float4 c4; } vs_CompCosinesUniforms; -typedef struct { +typedef struct +{ float4 position [[position]]; float4 texCoord0; float4 texCoord1; @@ -61,8 +63,9 @@ typedef struct { float4 texCoord3; } vs_CompCosinesnInOut; -vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[stage_in]], - constant vs_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[ stage_in ]], + constant vs_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ vs_CompCosinesnInOut out; out.position = float4(in.position, 1.0); @@ -80,7 +83,8 @@ vertex vs_CompCosinesnInOut vs_CompCosines(Vertex in [[stage_in]], return out; } -typedef struct { +typedef struct +{ float4 c0; float4 c1; float4 c2; @@ -89,12 +93,13 @@ typedef struct { float4 c5; } ps_CompCosinesUniforms; -fragment float4 ps_CompCosines(vs_CompCosinesnInOut in [[stage_in]], - texture2d t0 [[ texture(0) ]], - texture2d t1 [[ texture(1) ]], - texture2d t2 [[ texture(2) ]], - texture2d t3 [[ texture(3) ]], - constant ps_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +fragment float4 ps_CompCosines(vs_CompCosinesnInOut in [[ stage_in ]], + texture2d t0 [[ texture(0) ]], + texture2d t1 [[ texture(1) ]], + texture2d t2 [[ texture(2) ]], + texture2d t3 [[ texture(3) ]], + constant ps_CompCosinesUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ // Composite the cosines together. // Input map is cosine(pix) for each of // the 4 waves. @@ -111,18 +116,18 @@ fragment float4 ps_CompCosines(vs_CompCosinesnInOut in [[stage_in]], // Note also the c4 used for biasing back at the end. constexpr sampler colorSampler = sampler(mip_filter::linear, - mag_filter::linear, - min_filter::linear, - address::repeat); - - float4 out = 2 * (t0.sample(colorSampler, fract(in.texCoord0.xy)) - 0.5) * uniforms.c0; - out += 2 * (t1.sample(colorSampler, fract(in.texCoord1.xy)) - 0.5) * uniforms.c1; - out += 2 * (t2.sample(colorSampler, fract(in.texCoord2.xy)) - 0.5) * uniforms.c2; - out += 2 * (t3.sample(colorSampler, fract(in.texCoord3.xy)) - 0.5) * uniforms.c3; + mag_filter::linear, + min_filter::linear, + address::repeat); + + float4 out = 2.f * (t0.sample(colorSampler, fract(in.texCoord0.xy)) - 0.5f) * uniforms.c0; + out += 2.f * (t1.sample(colorSampler, fract(in.texCoord1.xy)) - 0.5f) * uniforms.c1; + out += 2.f * (t2.sample(colorSampler, fract(in.texCoord2.xy)) - 0.5f) * uniforms.c2; + out += 2.f * (t3.sample(colorSampler, fract(in.texCoord3.xy)) - 0.5f) * uniforms.c3; // Now bias it back into range [0..1] for output. out *= uniforms.c4; out += uniforms.c5; - out.b = 1.0; - out.a = 1.0; + out.b = 1.f; + out.a = 1.f; return out; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal index 3707a237ff..6b9bbc8884 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/FixedPipelineShaders.metal @@ -58,7 +58,8 @@ using namespace metal; #include "hsGMatStateEnums.h" -enum plUVWSrcModifiers: uint32_t{ +enum plUVWSrcModifiers: uint32_t +{ kUVWPassThru = 0x00000000, kUVWIdxMask = 0x0000ffff, kUVWNormal = 0x00010000, @@ -119,7 +120,8 @@ constant const bool hasCubicTexture6 = (sourceType6 == PassTypeCubicTexture && h constant const bool hasCubicTexture7 = (sourceType7 == PassTypeCubicTexture && hasLayer7); constant const bool hasCubicTexture8 = (sourceType8 == PassTypeCubicTexture && hasLayer8); -struct FragmentShaderArguments { +struct FragmentShaderArguments +{ texture2d textures [[ texture(FragmentShaderArgumentAttributeTextures), function_constant(has2DTexture1) ]]; texture2d texture2 [[ texture(FragmentShaderArgumentAttributeTextures + 1), function_constant(has2DTexture2) ]]; texture2d texture3 [[ texture(FragmentShaderArgumentAttributeTextures + 2), function_constant(has2DTexture3) ]]; @@ -172,27 +174,27 @@ typedef struct } ShadowCasterInOut; vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], - constant plMetalLights & lights [[ buffer(VertexShaderArgumentLights) ]], - constant float4x4 & blendMatrix1 [[ buffer(VertexShaderArgumentBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], + constant plMetalLights & lights [[ buffer(VertexShaderArgumentLights) ]], + constant float4x4 & blendMatrix1 [[ buffer(VertexShaderArgumentBlendMatrix1), function_constant(temp_hasOnlyWeight1) ]]) { ColorInOut out; // we should have been able to swizzle, but it didn't work in Xcode beta? Try again later. - const half4 inColor = half4(in.color.b, in.color.g, in.color.r, in.color.a) / half4(255.0f); + const half4 inColor = half4(in.color.b, in.color.g, in.color.r, in.color.a) / half4(255.f); const half3 MAmbient = mix(inColor.rgb, uniforms.ambientCol, uniforms.ambientSrc); const half4 MDiffuse = mix(inColor, uniforms.diffuseCol, uniforms.diffuseSrc); const half3 MEmissive = mix(inColor.rgb, uniforms.emissiveCol, uniforms.emissiveSrc); - half3 LAmbient = half3(0.0, 0.0, 0.0); - half3 LDiffuse = half3(0.0, 0.0, 0.0); + half3 LAmbient = half3(0.h, 0.h, 0.h); + half3 LDiffuse = half3(0.h, 0.h, 0.h); - const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz; + const float3 Ndirection = normalize(float4(in.normal, 0.f) * uniforms.localToWorldMatrix).xyz; - float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix); + float4 position = float4(in.position, 1.f) * uniforms.localToWorldMatrix; if (temp_hasOnlyWeight1) { - const float4 position2 = blendMatrix1 * float4(in.position, 1.0); - position = (in.weight1 * position) + ((1.0f - in.weight1) * position2); + const float4 position2 = blendMatrix1 * float4(in.position, 1.f); + position = (in.weight1 * position) + ((1.f - in.weight1) * position2); } for (size_t i = 0; i < lights.count; i++) { @@ -203,18 +205,18 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], // direction.w is attenuation float4 direction; - if (lightSource->position.w == 0.0) { + if (lightSource->position.w == 0.f) { // Directional Light with no attenuation - direction = float4(-(lightSource->direction).xyz, 1.0); + direction = float4(-(lightSource->direction).xyz, 1.f); } else { // Omni Light in all directions const float3 v2l = lightSource->position.xyz - position.xyz; const float distance = length(v2l); direction.xyz = normalize(v2l); - direction.w = 1.0 / (lightSource->constAtten + lightSource->linAtten * distance + lightSource->quadAtten * pow(distance, 2.0)); + direction.w = 1.f / (lightSource->constAtten + lightSource->linAtten * distance + lightSource->quadAtten * pow(distance, 2.f)); - if (lightSource->spotProps.x > 0.0) { + if (lightSource->spotProps.x > 0.f) { // Spot Light with cone falloff const float theta = dot(direction.xyz, normalize(-lightSource->direction).xyz); // inner cutoff @@ -222,7 +224,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], // outer cutoff const float phi = lightSource->spotProps.z; const float epsilon = (gamma - phi); - const float intensity = clamp((theta - phi) / epsilon, 0.0, 1.0); + const float intensity = clamp((theta - phi) / epsilon, 0.f, 1.f); direction.w *= pow(intensity, lightSource->spotProps.x); } @@ -230,12 +232,12 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], LAmbient.rgb = LAmbient.rgb + half3(direction.w * (lightSource->ambient.rgb * lightSource->scale)); const float3 dotResult = dot(Ndirection, direction.xyz); - LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (lightSource->diffuse.rgb * lightSource->scale) * half3(max(0.0, dotResult) * direction.w); + LDiffuse.rgb = LDiffuse.rgb + MDiffuse.rgb * (lightSource->diffuse.rgb * lightSource->scale) * half3(max(0.f, dotResult) * direction.w); } - const half3 ambient = (MAmbient.rgb) * clamp(uniforms.globalAmb.rgb + LAmbient.rgb, 0.0, 1.0); - const half3 diffuse = clamp(LDiffuse.rgb, 0.0, 1.0); - const half4 material = half4(clamp(ambient + diffuse + MEmissive.rgb, 0.0, 1.0), + const half3 ambient = (MAmbient.rgb) * clamp(uniforms.globalAmb.rgb + LAmbient.rgb, 0.h, 1.h); + const half3 diffuse = clamp(LDiffuse.rgb, 0.h, 1.h); + const half4 material = half4(clamp(ambient + diffuse + MEmissive.rgb, 0.h, 1.h), abs(uniforms.invVtxAlpha - MDiffuse.a)); out.vtxColor = half4(material.rgb, abs(uniforms.invVtxAlpha - MDiffuse.a)); @@ -244,7 +246,7 @@ vertex ColorInOut pipelineVertexShader(Vertex in [[stage_in]], // Fog out.fogColor = uniforms.calcFog(vCamPosition); - const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.f)) * uniforms.worldToCameraMatrix; for (size_t layer=0; layer 0) { resultColor.a = exp(-pow(fogValues.y * length(camPosition), fogValues.x)); @@ -392,7 +394,8 @@ half4 VertexUniforms::calcFog(float4 camPosition) constant { return resultColor; } -half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const { +half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 vertexColor, const uint8_t passType, float3 sampleCoord) const +{ if (passType == PassTypeColor) { return vertexColor; } else { @@ -406,7 +409,7 @@ half4 FragmentShaderArguments::sampleLayer(const size_t index, const half4 verte } else if (passType == PassTypeCubicTexture) { return (&cubicTextures)[index].sample((&samplers)[index], sampleCoord.xyz); } else { - return half4(0); + return half4(0.h); } } } @@ -454,7 +457,8 @@ fragment half4 pipelineFragmentShader(ColorInOut in [[stage_in]], return currentColor; } -constexpr void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { +constexpr void blendFirst(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) +{ // Local variable to store the color value if (blendFlags & kBlendInvertColor) { srcSample.rgb = 1.0h - srcSample.rgb; @@ -476,7 +480,8 @@ constexpr void blendFirst(half4 srcSample, thread half4 &destSample, const uint3 } } -constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) { +constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t blendFlags) +{ // Local variable to store the color value if (blendFlags & kBlendInvertColor) { srcSample.rgb = 1.0h - srcSample.rgb; @@ -569,8 +574,8 @@ constexpr void blend(half4 srcSample, thread half4 &destSample, const uint32_t b } } -vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]]) +vertex ShadowCasterInOut shadowVertexShader(Vertex in [[stage_in]], + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]]) { ShadowCasterInOut out; @@ -605,14 +610,14 @@ fragment half4 shadowFragmentShader(ShadowCasterInOut in [[stage_in]]) property. There is no need to push an entirely new light table. */ -vertex ColorInOut shadowCastVertexShader(Vertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], - constant plShadowState & shadowState [[ buffer(VertexShaderArgumentShadowState) ]]) +vertex ColorInOut shadowCastVertexShader(Vertex in [[ stage_in ]], + constant VertexUniforms & uniforms [[ buffer( VertexShaderArgumentFixedFunctionUniforms) ]], + constant plShadowState & shadowState [[ buffer(VertexShaderArgumentShadowState) ]]) { ColorInOut out; - float4 position = (float4(in.position, 1.0) * uniforms.localToWorldMatrix); - const float3 Ndirection = normalize(float4(in.normal, 0.0) * uniforms.localToWorldMatrix).xyz; + float4 position = (float4(in.position, 1.f) * uniforms.localToWorldMatrix); + const float3 Ndirection = normalize(float4(in.normal, 0.f) * uniforms.localToWorldMatrix).xyz; // Shadow casting uses the diffuse material color to control opacity const half4 MDiffuse = uniforms.diffuseCol; @@ -621,16 +626,16 @@ vertex ColorInOut shadowCastVertexShader(Vertex in [[stage_in]], if (shadowState.directional == true) { // Directional Light with no attenuation - direction = float4(-(shadowState.lightDirection).xyz, 1.0); + direction = float4(-(shadowState.lightDirection).xyz, 1.f); } else { // Omni Light in all directions const float3 v2l = shadowState.lightPosition.xyz - position.xyz; direction.xyz = normalize(v2l); - direction.w = 1.0; + direction.w = 1.f; } const float3 dotResult = dot(Ndirection, direction.xyz); - const half3 diffuse = MDiffuse.rgb * half3(max(0.0, dotResult)) * shadowState.power; + const half3 diffuse = MDiffuse.rgb * half3(max(0.h, dotResult)) * shadowState.power; out.vtxColor = half4(diffuse, 1.f); const float4 vCamPosition = position * uniforms.worldToCameraMatrix; @@ -638,7 +643,7 @@ vertex ColorInOut shadowCastVertexShader(Vertex in [[stage_in]], // Fog out.fogColor = uniforms.calcFog(vCamPosition); - const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.0)) * uniforms.worldToCameraMatrix; + const float4 normal = (uniforms.localToWorldMatrix * float4(in.normal, 0.f)) * uniforms.worldToCameraMatrix; for (size_t layer=0; layer texture [[ texture(16) ]], - constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(FragmentShaderArgumentShadowCastUniforms) ]], + texture2d texture [[ texture(16) ]], + constant plMetalShadowCastFragmentShaderArgumentBuffer & fragmentUniforms [[ buffer(FragmentShaderArgumentShadowCastUniforms) ]], FragmentShaderArguments layers, - constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowCastAlphaSrc) ]]) + constant int & alphaSrc [[ buffer(FragmentShaderArgumentShadowCastAlphaSrc) ]]) { float3 sampleCoords = in.texCoord1; if (fragmentUniforms.pointLightCast) { @@ -675,7 +680,7 @@ fragment half4 shadowCastFragmentShader(ColorInOut in [[stage_in]], // only possible alpha sources are layers 0 or 1 if (alphaSrc == 0 && passCount > 0) { - half4 layerColor = layers.sampleLayer(0, in.vtxColor,sourceTypes[0], in.texCoord3); + half4 layerColor = layers.sampleLayer(0.h, in.vtxColor,sourceTypes[0], in.texCoord3); currentColor.rgb *= layerColor.a; currentColor.rgb *= in.vtxColor.a; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal index 63000b5e69..bc1be0ff26 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/GammaCorrection.metal @@ -55,23 +55,21 @@ struct GammaVertexOut float2 texturePosition; }; -vertex GammaVertexOut gammaCorrectVertex(constant GammaVertexIn *in [[ buffer(0) ]], - uint vertexID [[ vertex_id ]]) +vertex GammaVertexOut gammaCorrectVertex(constant GammaVertexIn *in [[ buffer(0) ]], + uint vertexID [[ vertex_id ]]) { GammaVertexOut out; // Just pass the position through. We're clearing in NDC space. - out.position = float4(in[vertexID].position, 0.5, 1.0); + out.position = float4(in[vertexID].position, 0.5f, 1.f); out.texturePosition = float2(in[vertexID].texturePosition); return out; } -const constant sampler lutSampler = sampler( - filter::nearest - ); +const constant sampler lutSampler = sampler(filter::nearest); -fragment half4 gammaCorrectFragment(GammaVertexOut in [[stage_in]], - texture2d inputTexture [[texture(0)]], - texture1d_array LUT [[texture(1)]]) +fragment half4 gammaCorrectFragment(GammaVertexOut in [[stage_in]], + texture2d inputTexture [[texture(0)]], + texture1d_array LUT [[texture(1)]]) { float4 color = inputTexture.read(ushort2(in.position.xy)); return { diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal index f5e5470b13..888ba89d76 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/Grass.metal @@ -1,9 +1,44 @@ -// -// GrassShader.metal -// plGLClient -// -// Created by Colin Cornaby on 1/1/22. -// +/*==LICENSE==* + +CyanWorlds.com Engine - MMOG client, server and tools +Copyright (C) 2011 Cyan Worlds, Inc. + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + +Additional permissions under GNU GPL version 3 section 7 + +If you modify this Program, or any covered work, by linking or +combining it with any of RAD Game Tools Bink SDK, Autodesk 3ds Max SDK, +NVIDIA PhysX SDK, Microsoft DirectX SDK, OpenSSL library, Independent +JPEG Group JPEG library, Microsoft Windows Media SDK, or Apple QuickTime SDK +(or a modified version of those libraries), +containing parts covered by the terms of the Bink SDK EULA, 3ds Max EULA, +PhysX SDK EULA, DirectX SDK EULA, OpenSSL and SSLeay licenses, IJG +JPEG Library README, Windows Media SDK EULA, or QuickTime SDK EULA, the +licensors of this Program grant you additional +permission to convey the resulting work. Corresponding Source for a +non-source form of such a combination shall include the source code for +the parts of OpenSSL and IJG JPEG Library used as well as that of the covered +work. + +You can contact Cyan Worlds, Inc. by email legal@cyan.com + or by snail mail at: + Cyan Worlds, Inc. + 14617 N Newport Hwy + Mead, WA 99021 + +*==LICENSE==*/ #include using namespace metal; @@ -12,7 +47,8 @@ using namespace metal; // ignoring the int and pi constants here and using whats built in // but reserving space for them in the buffer -typedef struct { +typedef struct +{ matrix_float4x4 Local2NDC; float4 intConstants; float4 time; @@ -26,14 +62,16 @@ typedef struct { float4 waveSpeed; } vs_GrassUniforms; -typedef struct { +typedef struct +{ float4 position [[position]]; float4 color; float4 texCoord; } vs_GrassInOut; -vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], - constant vs_GrassUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], + constant vs_GrassUniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ vs_GrassInOut out; float4 r0 = (in.position.x * uniforms.waveDirX) + (in.position.y * uniforms.waveDirX); @@ -41,7 +79,7 @@ vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], r0 += (uniforms.time.x * uniforms.waveSpeed); // scale by speed and add to X,Y input r0 = fract(r0); - r0 = (r0 - 0.5) * M_PI_F * 2; + r0 = (r0 - 0.5f) * M_PI_F * 2.f; float4 pow2 = r0 * r0; float4 pow3 = pow2 * r0; @@ -60,19 +98,20 @@ vertex vs_GrassInOut vs_GrassShader(Vertex in [[stage_in]], dot(r0, uniforms.waveDistortZ) ); - offset *= (2.0 * (1.0 - in.texCoord1.y)); // mult by Y tex coord. So the waves only affect the top verts + offset *= (2.f * (1.f - in.texCoord1.y)); // mult by Y tex coord. So the waves only affect the top verts float4 position = float4(in.position.xyz + offset, 1); out.position = position * uniforms.Local2NDC; - out.color = float4(in.color.r, in.color.g, in.color.b, in.color.a) / 255.0; - out.texCoord = float4(in.texCoord1, 0.0); + out.color = float4(in.color.r, in.color.g, in.color.b, in.color.a) / 255.f; + out.texCoord = float4(in.texCoord1, 0.f); return out; } -fragment half4 ps_GrassShader(vs_GrassInOut in [[stage_in]], - texture2d t0 [[ texture(0) ]]) { +fragment half4 ps_GrassShader(vs_GrassInOut in [[stage_in]], + texture2d t0 [[ texture(0) ]]) +{ constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, @@ -80,7 +119,7 @@ fragment half4 ps_GrassShader(vs_GrassInOut in [[stage_in]], half4 out = t0.sample(colorSampler, in.texCoord.xy); out *= half4(in.color); - if (out.a <= 0.1) + if (out.a <= 0.1h) discard_fragment(); return out; } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal index 810ddb7355..fc4db3f84a 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/PlateShaders.metal @@ -52,9 +52,10 @@ using namespace metal; using namespace metal; -typedef struct { - array, 8> textures [[ id(FragmentShaderArgumentAttributeTextures) ]]; - array, 8> cubicTextures [[ id(FragmentShaderArgumentAttributeCubicTextures) ]]; +typedef struct +{ + array, 8> textures [[ id(FragmentShaderArgumentAttributeTextures) ]]; + array, 8> cubicTextures [[ id(FragmentShaderArgumentAttributeCubicTextures) ]]; } FragmentShaderArguments; typedef struct @@ -71,26 +72,26 @@ typedef struct } ColorInOut; vertex ColorInOut plateVertexShader(PlateVertex in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], - uint v_id [[vertex_id]]) + constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], + uint v_id [[ vertex_id ]]) { ColorInOut out; - float4 position = float4(in.position, 0.0, 1.0); + float4 position = float4(in.position, 0.f, 1.f); position = position * uniforms.projectionMatrix; out.position = ( position * uniforms.localToWorldMatrix); - out.position.y *= -1.0f; + out.position.y *= -1.f; out.texCoord = (float4(in.texCoord, 1.0) * uniforms.uvTransforms[0].transform).xyz; - out.texCoord.y = 1.0 - out.texCoord.y; - out.normal = float4(0.0, 0.0, 1.0, 0.0); + out.texCoord.y = 1.f - out.texCoord.y; + out.normal = float4(0.f, 0.f, 1.f, 0.f); return out; } fragment float4 fragmentShader(ColorInOut in [[stage_in]], - constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], - constant float & alpha [[ buffer(6) ]], - texture2d colorMap [[ texture( FragmentShaderArgumentTexture) ]]) + constant VertexUniforms & uniforms [[ buffer(VertexShaderArgumentFixedFunctionUniforms) ]], + constant float & alpha [[ buffer(6) ]], + texture2d colorMap [[ texture(FragmentShaderArgumentTexture) ]]) { constexpr sampler colorSampler(mip_filter::linear, mag_filter::linear, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal index 89513a03eb..8d4b396082 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/TextFontShader.metal @@ -42,12 +42,10 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com #include using namespace metal; -// File for Metal kernel and shader functions #include #include -// Including header shared between this Metal shader code and Swift/C code executing Metal API commands #import "ShaderTypes.h" @@ -68,8 +66,8 @@ typedef struct half4 color; } ColorInOut; -vertex ColorInOut textFontVertexShader(constant Vertex *in [[ buffer(0) ]], - constant matrix_float4x4 & transform [[ buffer(1) ]], +vertex ColorInOut textFontVertexShader(constant Vertex *in [[ buffer(0) ]], + constant matrix_float4x4 & transform [[ buffer(1) ]], uint v_id [[vertex_id]]) { ColorInOut out; @@ -84,8 +82,8 @@ vertex ColorInOut textFontVertexShader(constant Vertex *in [[ buffer(0) ]], return out; } -fragment half4 textFontFragmentShader(ColorInOut in [[stage_in]], - texture2d colorMap [[ texture(0) ]]) +fragment half4 textFontFragmentShader(ColorInOut in [[stage_in]], + texture2d colorMap [[ texture(0) ]]) { constexpr sampler colorSampler(mip_filter::nearest, mag_filter::nearest, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal index c190044888..bbce3cfcaa 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDec1Lay_7.metal @@ -45,7 +45,8 @@ using namespace metal; #include "ShaderVertex.h" -typedef struct { +typedef struct +{ matrix_float4x4 WorldToNDC; float4 Frequency; float4 Phase; @@ -84,15 +85,17 @@ typedef struct { float4 DirYSqKW; // Only used by DecalEnv } vs_WaveDev1Lay_7Uniforms; -typedef struct { +typedef struct +{ float4 position [[position]]; half4 c0; float4 texCoord0; half4 fog; } vs_WaveDev1Lay_7InOut; -vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], - constant vs_WaveDev1Lay_7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], + constant vs_WaveDev1Lay_7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ vs_WaveDev1Lay_7InOut out; // Store our input position in world space in r6 float4 worldPosition = float4(0); @@ -270,8 +273,9 @@ vertex vs_WaveDev1Lay_7InOut vs_WaveDec1Lay_7(Vertex in [[stage_in]], return out; } -fragment half4 ps_CbaseAbase(vs_WaveDev1Lay_7InOut in [[stage_in]], - texture2d texture [[ texture(0) ]]) { +fragment half4 ps_CbaseAbase(vs_WaveDev1Lay_7InOut in [[stage_in]], + texture2d texture [[ texture(0) ]]) +{ constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal index f74fe431fe..b70c05c335 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveDecEnv.metal @@ -45,7 +45,8 @@ using namespace metal; #include "ShaderVertex.h" -typedef struct { +typedef struct +{ matrix_float4x4 WorldToNDC; float4 Frequency; float4 Phase; @@ -84,7 +85,8 @@ typedef struct { float4 DirYSqKW; // Only used by DecalEnv } vs_WaveDecEnv7Uniforms; -typedef struct { +typedef struct +{ float4 position [[position]]; float4 c1; float4 texCoord0; @@ -94,8 +96,9 @@ typedef struct { float fog; } vs_WaveDecEnv7InOut; -vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], - constant vs_WaveDecEnv7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[ stage_in ]], + constant vs_WaveDecEnv7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ vs_WaveDecEnv7InOut out; // Store our input position in world space in r6 @@ -403,9 +406,10 @@ vertex vs_WaveDecEnv7InOut vs_WaveDecEnv_7(Vertex in [[stage_in]], return out; } -fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[stage_in]], - texture2d normalMap [[ texture(0) ]], - texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1) ]]) { +fragment float4 ps_WaveDecEnv(vs_WaveDecEnv7InOut in [[ stage_in ]], + texture2d normalMap [[ texture(0) ]], + texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 1) ]]) +{ // Very simular to ps_WaveFixed.inl. Only the final coloring is different. // Even though so far they are identical. diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal index 83885af0a7..1b59e2bed4 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveRip.metal @@ -45,7 +45,8 @@ using namespace metal; #include "ShaderVertex.h" -typedef struct { +typedef struct +{ matrix_float4x4 WorldToNDC; float4 FogSet; float4 Frequency; @@ -81,15 +82,17 @@ typedef struct { float4 RampBias; } vs_WaveRip7Uniforms; -typedef struct { +typedef struct +{ float4 position [[position]]; half4 c1; float2 texCoord0; float fog; } waveRipInOut; -vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], - constant vs_WaveRip7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], + constant vs_WaveRip7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ waveRipInOut out; // Store our input position in world space in r6 @@ -289,8 +292,9 @@ vertex waveRipInOut vs_WaveRip7(Vertex in [[stage_in]], return out; } -fragment half4 ps_WaveRip(waveRipInOut in [[stage_in]], - texture2d texture [[ texture(0) ]]) { +fragment half4 ps_WaveRip(waveRipInOut in [[stage_in]], + texture2d texture [[ texture(0) ]]) +{ constexpr sampler colorSampler = sampler(mip_filter::linear, mag_filter::linear, min_filter::linear, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal index f56e600f8e..d0efefcce6 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/ShaderSrc/WaveSet7.metal @@ -45,7 +45,8 @@ using namespace metal; #include "ShaderVertex.h" -typedef struct { +typedef struct +{ matrix_float4x4 WorldToNDC; float4 WaterTint; float4 Frequency; @@ -82,7 +83,8 @@ typedef struct { float4 DirYSqKW; } vs_WaveFixedFin7Uniforms; -typedef struct { +typedef struct +{ float4 position [[position]]; float4 c1; float4 c2; @@ -93,8 +95,9 @@ typedef struct { float fog; } vs_WaveFixedFin7InOut; -vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], - constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) { +vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], + constant vs_WaveFixedFin7Uniforms & uniforms [[ buffer(VertexShaderArgumentMaterialShaderUniforms) ]]) +{ vs_WaveFixedFin7InOut out; // Store our input position in world space in r6 @@ -437,9 +440,10 @@ vertex vs_WaveFixedFin7InOut vs_WaveFixedFin7(Vertex in [[stage_in]], return out; } -fragment float4 ps_WaveFixed(vs_WaveFixedFin7InOut in [[stage_in]], - texture2d normalMap [[ texture(0) ]], - texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3) ]]) { +fragment float4 ps_WaveFixed(vs_WaveFixedFin7InOut in [[stage_in]], + texture2d normalMap [[ texture(0) ]], + texturecube environmentMap [[ texture(FragmentShaderArgumentAttributeCubicTextures + 3) ]]) +{ // Short pixel shader. Use the texm3x3vspec to do a per-pixel // reflected lookup into our environment map. // Input: diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h index 8206757594..5d9dbd2321 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalDeviceRef.h @@ -289,7 +289,7 @@ class plMetalRenderTargetRef : public plMetalTextureRef void Link(plMetalRenderTargetRef** back) { plMetalDeviceRef::Link((plMetalDeviceRef**)back); } plMetalRenderTargetRef* GetNext() const { return (plMetalRenderTargetRef*)fNext; } - plMetalRenderTargetRef() : fDepthBuffer(nullptr) + plMetalRenderTargetRef() : fDepthBuffer() { } diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp index 85269bf520..725eb9eb74 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.cpp @@ -165,12 +165,13 @@ void plMetalMaterialShaderRef::FastEncodeArguments(MTL::RenderCommandEncoder* en encoder->setFragmentBuffer(fPassArgumentBuffers[pass], 0, FragmentShaderArgumentUniforms); } -void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encoder, - VertexUniforms* vertexUniforms, uint pass, +void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encoder, + VertexUniforms* vertexUniforms, + const uint pass, plMetalFragmentShaderDescription* passDescription, std::vector* piggyBacks, - std::function preEncodeTransform, - std::function postEncodeTransform) + const std::function preEncodeTransform, + const std::function postEncodeTransform) { std::vector layers = GetLayersForPass(pass); @@ -201,7 +202,7 @@ void plMetalMaterialShaderRef::EncodeArguments(MTL::RenderCommandEncoder* encode encoder->setFragmentBytes(&uniforms, sizeof(plMetalFragmentShaderArgumentBuffer), FragmentShaderArgumentUniforms); } -void plMetalMaterialShaderRef::EncodeTransform(plLayerInterface* layer, UVOutDescriptor* transform) +void plMetalMaterialShaderRef::EncodeTransform(const plLayerInterface* layer, UVOutDescriptor* transform) { matrix_float4x4 tXfm; hsMatrix2SIMD(layer->GetTransform(), &tXfm); @@ -307,7 +308,7 @@ const hsGMatState plMetalMaterialShaderRef::ICompositeLayerState(const plLayerIn return state; } -void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer) +void plMetalMaterialShaderRef::IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, const uint32_t offsetFromRootLayer, plLayerInterface* layer) { // Reminder: Encoder is allowed to be null when Plasma is precompiling pipeline states // Metal needs to know if a shader is 2D or Cubic to compile shaders @@ -411,7 +412,7 @@ bool plMetalMaterialShaderRef::ICanEatLayer(plLayerInterface* lay) return true; } -uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, +uint32_t plMetalMaterialShaderRef::IHandleMaterial(uint32_t layer, plMetalFragmentShaderDescription* passDescription, plMetalFragmentShaderArgumentBuffer* uniforms, std::vector* piggybacks, diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h index 0ab3eddd66..ae643e56cb 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalMaterialShaderRef.h @@ -80,16 +80,16 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef uint32_t GetNumPasses() const { return fNumPasses; } - uint32_t GetPassIndex(size_t which) const { return fPassIndices[which]; } + uint32_t GetPassIndex(const size_t which) const { return fPassIndices[which]; } const std::vector GetLayersForPass(size_t pass) const { return fPasses[pass]; } - void EncodeArguments(MTL::RenderCommandEncoder* encoder, + void EncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, - uint pass, + const uint pass, plMetalFragmentShaderDescription* passDescription, std::vector* piggyBacks, - std::function preEncodeTransform, - std::function postEncodeTransform); + const std::function preEncodeTransform, + const std::function postEncodeTransform); void FastEncodeArguments(MTL::RenderCommandEncoder* encoder, VertexUniforms* vertexUniforms, uint pass); // probably not a good idea to call prepareTextures directly // mostly just a hack to keep plates working for now @@ -119,8 +119,8 @@ class plMetalMaterialShaderRef : public plMetalDeviceRef bool ICanEatLayer(plLayerInterface* lay); uint32_t ILayersAtOnce(uint32_t which); - void IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, uint32_t offsetFromRootLayer, plLayerInterface* layer); - void EncodeTransform(plLayerInterface* layer, UVOutDescriptor *transform); + void IBuildLayerTexture(MTL::RenderCommandEncoder* encoder, const uint32_t offsetFromRootLayer, plLayerInterface* layer); + void EncodeTransform(const plLayerInterface* layer, UVOutDescriptor *transform); std::vector> fPasses; std::vector fFragmentShaderDescriptions; }; diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h index 2d9541780f..4d819490c7 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPipeline.h @@ -102,7 +102,7 @@ class plMetalPipeline : public pl3DPipeline public: plMetalPipeline(hsWindowHndl display, hsWindowHndl window, const hsG3DDeviceModeRecord* devMode); - virtual ~plMetalPipeline(); + ~plMetalPipeline(); CLASSNAME_REGISTER(plMetalPipeline); GETINTERFACE_ANY(plMetalPipeline, plPipeline); @@ -201,12 +201,12 @@ class plMetalPipeline : public pl3DPipeline plLayerInterface* IPushOverAllLayer(plLayerInterface* li); plLayerInterface* IPopOverAllLayer(plLayerInterface* li); - void IPushPiggyBacks(hsGMaterial* mat); - void IPopPiggyBacks(); - void IPushProjPiggyBack(plLayerInterface* li); - void IPopProjPiggyBacks(); + void IPushPiggyBacks(hsGMaterial* mat); + void IPopPiggyBacks(); + void IPushProjPiggyBack(plLayerInterface* li); + void IPopProjPiggyBacks(); size_t ISetNumActivePiggyBacks(); - bool ICheckAuxBuffers(const plAuxSpan* span); + bool ICheckAuxBuffers(const plAuxSpan* span); void ISetPipeConsts(plShader* shader); bool ISetShaders(const plMetalVertexBufferRef* vRef, const hsGMatState blendMode, plShader* vShader, plShader* pShader); diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp index 82db658b66..9f6b1a387b 100644 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp +++ b/Sources/Plasma/FeatureLib/pfMetalPipeline/plMetalPlateManager.cpp @@ -49,7 +49,7 @@ You can contact Cyan Worlds, Inc. by email legal@cyan.com plMetalPlateManager::plMetalPlateManager(plMetalPipeline* pipe) : plPlateManager(pipe), - fVtxBuffer(nullptr) + fVtxBuffer() { MTL::DepthStencilDescriptor *depthDescriptor = MTL::DepthStencilDescriptor::alloc()->init(); depthDescriptor->setDepthCompareFunction(MTL::CompareFunctionAlways); From 245b00f653e6799dee26fd7766057574cd42f5e5 Mon Sep 17 00:00:00 2001 From: Colin Cornaby Date: Tue, 28 Nov 2023 15:35:34 -0800 Subject: [PATCH 165/165] Removing clang format file --- .../FeatureLib/pfMetalPipeline/.clang-format | 70 ------------------- 1 file changed, 70 deletions(-) delete mode 100644 Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format diff --git a/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format b/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format deleted file mode 100644 index 76d0387f80..0000000000 --- a/Sources/Plasma/FeatureLib/pfMetalPipeline/.clang-format +++ /dev/null @@ -1,70 +0,0 @@ ---- -BasedOnStyle: Google ---- -Language: Cpp -# Modifications to the style for Plasma go here -IndentWidth: 4 -AccessModifierOffset: -4 -ColumnLimit: 0 -BreakBeforeBraces: Custom -BraceWrapping: - AfterClass: true - AfterControlStatement: Never - AfterEnum: true - AfterFunction: true - AfterNamespace: true - AfterObjCDeclaration: true - AfterStruct: true - AfterUnion: true - BeforeCatch: false - BeforeElse: false - IndentBraces: false -IndentAccessModifiers: false -AlignConsecutiveDeclarations: AcrossComments -AlignConsecutiveAssignments: - Enabled: False -ReflowComments: true -LambdaBodyIndentation: OuterScope -AllowShortBlocksOnASingleLine: true -AllowShortFunctionsOnASingleLine: true -AllowShortIfStatementsOnASingleLine: true -PackConstructorInitializers: CurrentLine -ObjCBlockIndentWidth: 4 -SpacesInLineCommentPrefix: - Minimum: 1 - Maximum: -1 -SpacesBeforeTrailingComments: 1 ---- -Language: ObjC -# Obj-C specific settings go here -IndentWidth: 4 -AccessModifierOffset: -4 -ColumnLimit: 0 -BreakBeforeBraces: Custom -BraceWrapping: - AfterClass: true - AfterControlStatement: Never - AfterEnum: true - AfterFunction: true - AfterNamespace: true - AfterObjCDeclaration: true - AfterStruct: true - AfterUnion: true - BeforeCatch: false - BeforeElse: false - IndentBraces: false -IndentAccessModifiers: false -AlignConsecutiveDeclarations: AcrossComments -AlignConsecutiveAssignments: - Enabled: False -ReflowComments: true -LambdaBodyIndentation: OuterScope -AllowShortBlocksOnASingleLine: true -AllowShortFunctionsOnASingleLine: true -AllowShortIfStatementsOnASingleLine: true -PackConstructorInitializers: CurrentLine -ObjCBlockIndentWidth: 4 -SpacesInLineCommentPrefix: - Minimum: 1 - Maximum: -1 -SpacesBeforeTrailingComments: 1