From 394ea653aacd131c234596299ebf79a52580c9b9 Mon Sep 17 00:00:00 2001 From: BlueCube3310 <53150244+BlueCube3310@users.noreply.github.com> Date: Thu, 4 Jan 2024 16:33:20 +0100 Subject: [PATCH] Add Betsy to speed up BC6 compression --- core/io/image.cpp | 23 + core/io/image.h | 2 + doc/classes/ProjectSettings.xml | 5 + .../resource_importer_layered_texture.cpp | 2 - .../CrossPlatformSettings_piece_all.glsl | 76 ++ modules/betsy/SCsub | 24 + modules/betsy/UavCrossPlatform_piece_all.glsl | 17 + modules/betsy/bc6h.glsl | 653 ++++++++++++++++++ modules/betsy/config.py | 6 + modules/betsy/image_compress_betsy.cpp | 354 ++++++++++ modules/betsy/image_compress_betsy.h | 44 ++ modules/betsy/register_types.cpp | 47 ++ modules/betsy/register_types.h | 39 ++ modules/cvtt/image_compress_cvtt.cpp | 5 + servers/rendering_server.cpp | 1 + thirdparty/README.md | 11 + thirdparty/betsy/LICENSE.md | 18 + 17 files changed, 1325 insertions(+), 2 deletions(-) create mode 100644 modules/betsy/CrossPlatformSettings_piece_all.glsl create mode 100644 modules/betsy/SCsub create mode 100644 modules/betsy/UavCrossPlatform_piece_all.glsl create mode 100644 modules/betsy/bc6h.glsl create mode 100644 modules/betsy/config.py create mode 100644 modules/betsy/image_compress_betsy.cpp create mode 100644 modules/betsy/image_compress_betsy.h create mode 100644 modules/betsy/register_types.cpp create mode 100644 modules/betsy/register_types.h create mode 100644 thirdparty/betsy/LICENSE.md diff --git a/core/io/image.cpp b/core/io/image.cpp index b35d405662e6..72a6be392eff 100644 --- a/core/io/image.cpp +++ b/core/io/image.cpp @@ -30,6 +30,7 @@ #include "image.h" +#include "core/config/project_settings.h" #include "core/error/error_list.h" #include "core/error/error_macros.h" #include "core/io/image_loader.h" @@ -2734,6 +2735,27 @@ Error Image::compress(CompressMode p_mode, CompressSource p_source, ASTCFormat p Error Image::compress_from_channels(CompressMode p_mode, UsedChannels p_channels, ASTCFormat p_astc_format) { ERR_FAIL_COND_V(data.is_empty(), ERR_INVALID_DATA); + // RenderingDevice only. + if (GLOBAL_GET("rendering/textures/vram_compression/compress_with_gpu")) { + switch (p_mode) { + case COMPRESS_BPTC: { + // BC7 is unsupported currently. + if ((format >= FORMAT_RF && format <= FORMAT_RGBE9995) && _image_compress_bptc_rd_func) { + Error result = _image_compress_bptc_rd_func(this, p_channels); + + // If the image was compressed successfully, we return here. If not, we fall back to the default compression scheme. + if (result == OK) { + return OK; + } + } + + } break; + + default: { + } + } + } + switch (p_mode) { case COMPRESS_S3TC: { ERR_FAIL_NULL_V(_image_compress_bc_func, ERR_UNAVAILABLE); @@ -3115,6 +3137,7 @@ void (*Image::_image_compress_bptc_func)(Image *, Image::UsedChannels) = nullptr void (*Image::_image_compress_etc1_func)(Image *) = nullptr; void (*Image::_image_compress_etc2_func)(Image *, Image::UsedChannels) = nullptr; void (*Image::_image_compress_astc_func)(Image *, Image::ASTCFormat) = nullptr; +Error (*Image::_image_compress_bptc_rd_func)(Image *, Image::UsedChannels) = nullptr; void (*Image::_image_decompress_bc)(Image *) = nullptr; void (*Image::_image_decompress_bptc)(Image *) = nullptr; void (*Image::_image_decompress_etc1)(Image *) = nullptr; diff --git a/core/io/image.h b/core/io/image.h index d55cc39dbb57..18b8c99f694d 100644 --- a/core/io/image.h +++ b/core/io/image.h @@ -159,6 +159,8 @@ class Image : public Resource { static void (*_image_compress_etc2_func)(Image *, UsedChannels p_channels); static void (*_image_compress_astc_func)(Image *, ASTCFormat p_format); + static Error (*_image_compress_bptc_rd_func)(Image *, UsedChannels p_channels); + static void (*_image_decompress_bc)(Image *); static void (*_image_decompress_bptc)(Image *); static void (*_image_decompress_etc1)(Image *); diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 8d567f347a8f..838fef48fc91 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -2875,6 +2875,11 @@ If [code]true[/code], the texture importer will import lossless textures using the PNG format. Otherwise, it will default to using WebP. + + If [code]true[/code], the texture importer will utilize the GPU for compressing textures, which makes large textures import significantly faster. + [b]Note:[/b] This setting requires either Vulkan or D3D12 available as a rendering backend. + [b]Note:[/b] Currently this only affects BC6H compression, which is used on Desktop and Console for HDR images. + If [code]true[/code], the texture importer will import VRAM-compressed textures using the Ericsson Texture Compression 2 algorithm for lower quality textures and normal maps and Adaptable Scalable Texture Compression algorithm for high quality textures (in 4×4 block size). [b]Note:[/b] This setting is an override. The texture importer will always import the format the host platform needs, even if this is set to [code]false[/code]. diff --git a/editor/import/resource_importer_layered_texture.cpp b/editor/import/resource_importer_layered_texture.cpp index 46882e95cb6f..212a4160bfc3 100644 --- a/editor/import/resource_importer_layered_texture.cpp +++ b/editor/import/resource_importer_layered_texture.cpp @@ -341,8 +341,6 @@ Error ResourceImporterLayeredTexture::import(const String &p_source_file, const } if (compress_mode == COMPRESS_VRAM_COMPRESSED) { - mipmaps = true; - //if using video ram, optimize if (channel_pack == 0) { //remove alpha if not needed, so compression is more efficient diff --git a/modules/betsy/CrossPlatformSettings_piece_all.glsl b/modules/betsy/CrossPlatformSettings_piece_all.glsl new file mode 100644 index 000000000000..b7abac7fcc43 --- /dev/null +++ b/modules/betsy/CrossPlatformSettings_piece_all.glsl @@ -0,0 +1,76 @@ + +#define min3(a, b, c) min(a, min(b, c)) +#define max3(a, b, c) max(a, max(b, c)) + +#define float2 vec2 +#define float3 vec3 +#define float4 vec4 + +#define int2 ivec2 +#define int3 ivec3 +#define int4 ivec4 + +#define uint2 uvec2 +#define uint3 uvec3 +#define uint4 uvec4 + +#define float2x2 mat2 +#define float3x3 mat3 +#define float4x4 mat4 +#define ogre_float4x3 mat3x4 + +#define ushort uint +#define ushort3 uint3 +#define ushort4 uint4 + +//Short used for read operations. It's an int in GLSL & HLSL. An ushort in Metal +#define rshort int +#define rshort2 int2 +#define rint int +//Short used for write operations. It's an int in GLSL. An ushort in HLSL & Metal +#define wshort2 int2 +#define wshort3 int3 + +#define toFloat3x3(x) mat3(x) +#define buildFloat3x3(row0, row1, row2) mat3(row0, row1, row2) + +#define mul(x, y) ((x) * (y)) +#define saturate(x) clamp((x), 0.0, 1.0) +#define lerp mix +#define rsqrt inversesqrt +#define INLINE +#define NO_INTERPOLATION_PREFIX flat +#define NO_INTERPOLATION_SUFFIX + +#define PARAMS_ARG_DECL +#define PARAMS_ARG + +#define reversebits bitfieldReverse + +#define OGRE_Sample(tex, sampler, uv) texture(tex, uv) +#define OGRE_SampleLevel(tex, sampler, uv, lod) textureLod(tex, uv, lod) +#define OGRE_SampleArray2D(tex, sampler, uv, arrayIdx) texture(tex, vec3(uv, arrayIdx)) +#define OGRE_SampleArray2DLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec3(uv, arrayIdx), lod) +#define OGRE_SampleArrayCubeLevel(tex, sampler, uv, arrayIdx, lod) textureLod(tex, vec4(uv, arrayIdx), lod) +#define OGRE_SampleGrad(tex, sampler, uv, ddx, ddy) textureGrad(tex, uv, ddx, ddy) +#define OGRE_SampleArray2DGrad(tex, sampler, uv, arrayIdx, ddx, ddy) textureGrad(tex, vec3(uv, arrayIdx), ddx, ddy) +#define OGRE_ddx(val) dFdx(val) +#define OGRE_ddy(val) dFdy(val) +#define OGRE_Load2D(tex, iuv, lod) texelFetch(tex, iuv, lod) +#define OGRE_LoadArray2D(tex, iuv, arrayIdx, lod) texelFetch(tex, ivec3(iuv, arrayIdx), lod) +#define OGRE_Load2DMS(tex, iuv, subsample) texelFetch(tex, iuv, subsample) + +#define OGRE_Load3D(tex, iuv, lod) texelFetch(tex, ivec3(iuv), lod) + +#define OGRE_GatherRed(tex, sampler, uv) textureGather(tex, uv, 0) +#define OGRE_GatherGreen(tex, sampler, uv) textureGather(tex, uv, 1) +#define OGRE_GatherBlue(tex, sampler, uv) textureGather(tex, uv, 2) + +#define bufferFetch1(buffer, idx) texelFetch(buffer, idx).x + +#define OGRE_SAMPLER_ARG_DECL(samplerName) +#define OGRE_SAMPLER_ARG(samplerName) + +#define OGRE_Texture3D_float4 sampler3D +#define OGRE_OUT_REF(declType, variableName) out declType variableName +#define OGRE_INOUT_REF(declType, variableName) inout declType variableName diff --git a/modules/betsy/SCsub b/modules/betsy/SCsub new file mode 100644 index 000000000000..9930e1f4cf96 --- /dev/null +++ b/modules/betsy/SCsub @@ -0,0 +1,24 @@ +# !/ usr / bin / env python +Import("env") +Import("env_modules") + +env_betsy = env_modules.Clone() +env_betsy.GLSL_HEADER("bc6h.glsl") +env_betsy.Depends(Glob("*.glsl.gen.h"), ["#glsl_builders.py"]) + +# Thirdparty source files +thirdparty_obj = [] +thirdparty_dir = "#thirdparty/betsy/" +env_betsy.Prepend(CPPPATH=[thirdparty_dir]) + +env_thirdparty = env_betsy.Clone() +env_thirdparty.disable_warnings() +env.modules_sources += thirdparty_obj + +# Godot source files +module_obj = [] +env_betsy.add_source_files(module_obj, "*.cpp") +env.modules_sources += module_obj + +# Needed to force rebuilding the module files when the thirdparty library is updated. +env.Depends(module_obj, thirdparty_obj) diff --git a/modules/betsy/UavCrossPlatform_piece_all.glsl b/modules/betsy/UavCrossPlatform_piece_all.glsl new file mode 100644 index 000000000000..30854df637c5 --- /dev/null +++ b/modules/betsy/UavCrossPlatform_piece_all.glsl @@ -0,0 +1,17 @@ + +#define OGRE_imageLoad2D(inImage, iuv) imageLoad(inImage, int2(iuv)) +#define OGRE_imageLoad2DArray(inImage, iuvw) imageLoad(inImage, int3(iuvw)) + +#define OGRE_imageWrite2D1(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0, 0)) +#define OGRE_imageWrite2D2(outImage, iuv, value) imageStore(outImage, int2(iuv), float4(value, 0, 0)) +#define OGRE_imageWrite2D4(outImage, iuv, value) imageStore(outImage, int2(iuv), value) + +#define OGRE_imageLoad3D(inImage, iuv) imageLoad(inImage, int3(iuv)) + +#define OGRE_imageWrite3D1(outImage, iuv, value) imageStore(outImage, int3(iuv), value) +#define OGRE_imageWrite3D4(outImage, iuv, value) imageStore(outImage, int3(iuv), value) + +#define OGRE_imageWrite2DArray1(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value) +#define OGRE_imageWrite2DArray4(outImage, iuvw, value) imageStore(outImage, int3(iuvw), value) + +//#define sharedOnlyBarrier memoryBarrierShared();barrier(); diff --git a/modules/betsy/bc6h.glsl b/modules/betsy/bc6h.glsl new file mode 100644 index 000000000000..0d10d378fd80 --- /dev/null +++ b/modules/betsy/bc6h.glsl @@ -0,0 +1,653 @@ +#[versions] + +signed = "#define SIGNED"; +unsigned = ""; + +#[compute] +#version 450 + +#include "CrossPlatformSettings_piece_all.glsl" +#include "UavCrossPlatform_piece_all.glsl" + +#VERSION_DEFINES +#define QUALITY + +//SIGNED macro is WIP +//#define SIGNED + +float3 f32tof16(float3 value) { + return float3(packHalf2x16(float2(value.x, 0.0)), + packHalf2x16(float2(value.y, 0.0)), + packHalf2x16(float2(value.z, 0.0))); +} + +float3 f16tof32(uint3 value) { + return float3(unpackHalf2x16(value.x).x, + unpackHalf2x16(value.y).x, + unpackHalf2x16(value.z).x); +} + +float f32tof16(float value) { + return packHalf2x16(float2(value.x, 0.0)); +} + +float f16tof32(uint value) { + return unpackHalf2x16(value.x).x; +} + +layout(binding = 0) uniform sampler2D srcTexture; +layout(binding = 1, rgba32ui) uniform restrict writeonly uimage2D dstTexture; + +layout(push_constant, std430) uniform Params { + float2 p_textureSizeRcp; + uint padding0; + uint padding1; +} +params; + +const float HALF_MAX = 65504.0f; +const uint PATTERN_NUM = 32u; + +float CalcMSLE(float3 a, float3 b) { + float3 err = log2((b + 1.0f) / (a + 1.0f)); + err = err * err; + return err.x + err.y + err.z; +} + +uint PatternFixupID(uint i) { + uint ret = 15u; + ret = ((3441033216u >> i) & 0x1u) != 0 ? 2u : ret; + ret = ((845414400u >> i) & 0x1u) != 0 ? 8u : ret; + return ret; +} + +uint Pattern(uint p, uint i) { + uint p2 = p / 2u; + uint p3 = p - p2 * 2u; + + uint enc = 0u; + enc = p2 == 0u ? 2290666700u : enc; + enc = p2 == 1u ? 3972591342u : enc; + enc = p2 == 2u ? 4276930688u : enc; + enc = p2 == 3u ? 3967876808u : enc; + enc = p2 == 4u ? 4293707776u : enc; + enc = p2 == 5u ? 3892379264u : enc; + enc = p2 == 6u ? 4278255592u : enc; + enc = p2 == 7u ? 4026597360u : enc; + enc = p2 == 8u ? 9369360u : enc; + enc = p2 == 9u ? 147747072u : enc; + enc = p2 == 10u ? 1930428556u : enc; + enc = p2 == 11u ? 2362323200u : enc; + enc = p2 == 12u ? 823134348u : enc; + enc = p2 == 13u ? 913073766u : enc; + enc = p2 == 14u ? 267393000u : enc; + enc = p2 == 15u ? 966553998u : enc; + + enc = p3 != 0u ? enc >> 16u : enc; + uint ret = (enc >> i) & 0x1u; + return ret; +} + +#ifndef SIGNED +//UF +float3 Quantize7(float3 x) { + return (f32tof16(x) * 128.0f) / (0x7bff + 1.0f); +} + +float3 Quantize9(float3 x) { + return (f32tof16(x) * 512.0f) / (0x7bff + 1.0f); +} + +float3 Quantize10(float3 x) { + return (f32tof16(x) * 1024.0f) / (0x7bff + 1.0f); +} + +float3 Unquantize7(float3 x) { + return (x * 65536.0f + 0x8000) / 128.0f; +} + +float3 Unquantize9(float3 x) { + return (x * 65536.0f + 0x8000) / 512.0f; +} + +float3 Unquantize10(float3 x) { + return (x * 65536.0f + 0x8000) / 1024.0f; +} + +float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { + float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 4096.0f); + return f16tof32(uint3(comp)); +} +#else +//SF + +float3 cmpSign(float3 value) { + float3 signVal; + signVal.x = value.x >= 0.0f ? 1.0f : -1.0f; + signVal.y = value.y >= 0.0f ? 1.0f : -1.0f; + signVal.z = value.z >= 0.0f ? 1.0f : -1.0f; + return signVal; +} + +float3 Quantize7(float3 x) { + float3 signVal = cmpSign(x); + return signVal * (f32tof16(abs(x)) * 64.0f) / (0x7bff + 1.0f); +} + +float3 Quantize9(float3 x) { + float3 signVal = cmpSign(x); + return signVal * (f32tof16(abs(x)) * 256.0f) / (0x7bff + 1.0f); +} + +float3 Quantize10(float3 x) { + float3 signVal = cmpSign(x); + return signVal * (f32tof16(abs(x)) * 512.0f) / (0x7bff + 1.0f); +} + +float3 Unquantize7(float3 x) { + float3 signVal = sign(x); + x = abs(x); + float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 64.0f; + finalVal.x = x.x >= 64.0f ? 32767.0 : finalVal.x; + finalVal.y = x.y >= 64.0f ? 32767.0 : finalVal.y; + finalVal.z = x.z >= 64.0f ? 32767.0 : finalVal.z; + return finalVal; +} + +float3 Unquantize9(float3 x) { + float3 signVal = sign(x); + x = abs(x); + float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 256.0f; + finalVal.x = x.x >= 256.0f ? 32767.0 : finalVal.x; + finalVal.y = x.y >= 256.0f ? 32767.0 : finalVal.y; + finalVal.z = x.z >= 256.0f ? 32767.0 : finalVal.z; + return finalVal; +} + +float3 Unquantize10(float3 x) { + float3 signVal = sign(x); + x = abs(x); + float3 finalVal = signVal * (x * 32768.0f + 0x4000) / 512.0f; + finalVal.x = x.x >= 512.0f ? 32767.0 : finalVal.x; + finalVal.y = x.y >= 512.0f ? 32767.0 : finalVal.y; + finalVal.z = x.z >= 512.0f ? 32767.0 : finalVal.z; + return finalVal; +} + +float3 FinishUnquantize(float3 endpoint0Unq, float3 endpoint1Unq, float weight) { + float3 comp = (endpoint0Unq * (64.0f - weight) + endpoint1Unq * weight + 32.0f) * (31.0f / 2048.0f); + /*float3 signVal; + signVal.x = comp.x >= 0.0f ? 0.0f : 0x8000; + signVal.y = comp.y >= 0.0f ? 0.0f : 0x8000; + signVal.z = comp.z >= 0.0f ? 0.0f : 0x8000;*/ + //return f16tof32( uint3( signVal + abs( comp ) ) ); + return f16tof32(uint3(comp)); +} +#endif + +void Swap(inout float3 a, inout float3 b) { + float3 tmp = a; + a = b; + b = tmp; +} + +void Swap(inout float a, inout float b) { + float tmp = a; + a = b; + b = tmp; +} + +uint ComputeIndex3(float texelPos, float endPoint0Pos, float endPoint1Pos) { + float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); + return uint(clamp(r * 6.98182f + 0.00909f + 0.5f, 0.0f, 7.0f)); +} + +uint ComputeIndex4(float texelPos, float endPoint0Pos, float endPoint1Pos) { + float r = (texelPos - endPoint0Pos) / (endPoint1Pos - endPoint0Pos); + return uint(clamp(r * 14.93333f + 0.03333f + 0.5f, 0.0f, 15.0f)); +} + +void SignExtend(inout float3 v1, uint mask, uint signFlag) { + int3 v = int3(v1); + v.x = (v.x & int(mask)) | (v.x < 0 ? int(signFlag) : 0); + v.y = (v.y & int(mask)) | (v.y < 0 ? int(signFlag) : 0); + v.z = (v.z & int(mask)) | (v.z < 0 ? int(signFlag) : 0); + v1 = v; +} + +void EncodeP1(inout uint4 block, inout float blockMSLE, float3 texels[16]) { + // compute endpoints (min/max RGB bbox) + float3 blockMin = texels[0]; + float3 blockMax = texels[0]; + for (uint i = 1u; i < 16u; ++i) { + blockMin = min(blockMin, texels[i]); + blockMax = max(blockMax, texels[i]); + } + + // refine endpoints in log2 RGB space + float3 refinedBlockMin = blockMax; + float3 refinedBlockMax = blockMin; + for (uint i = 0u; i < 16u; ++i) { + refinedBlockMin = min(refinedBlockMin, texels[i] == blockMin ? refinedBlockMin : texels[i]); + refinedBlockMax = max(refinedBlockMax, texels[i] == blockMax ? refinedBlockMax : texels[i]); + } + + float3 logBlockMax = log2(blockMax + 1.0f); + float3 logBlockMin = log2(blockMin + 1.0f); + float3 logRefinedBlockMax = log2(refinedBlockMax + 1.0f); + float3 logRefinedBlockMin = log2(refinedBlockMin + 1.0f); + float3 logBlockMaxExt = (logBlockMax - logBlockMin) * (1.0f / 32.0f); + logBlockMin += min(logRefinedBlockMin - logBlockMin, logBlockMaxExt); + logBlockMax -= min(logBlockMax - logRefinedBlockMax, logBlockMaxExt); + blockMin = exp2(logBlockMin) - 1.0f; + blockMax = exp2(logBlockMax) - 1.0f; + + float3 blockDir = blockMax - blockMin; + blockDir = blockDir / (blockDir.x + blockDir.y + blockDir.z); + + float3 endpoint0 = Quantize10(blockMin); + float3 endpoint1 = Quantize10(blockMax); + float endPoint0Pos = f32tof16(dot(blockMin, blockDir)); + float endPoint1Pos = f32tof16(dot(blockMax, blockDir)); + + // check if endpoint swap is required + float fixupTexelPos = f32tof16(dot(texels[0], blockDir)); + uint fixupIndex = ComputeIndex4(fixupTexelPos, endPoint0Pos, endPoint1Pos); + if (fixupIndex > 7) { + Swap(endPoint0Pos, endPoint1Pos); + Swap(endpoint0, endpoint1); + } + + // compute indices + uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + for (uint i = 0u; i < 16u; ++i) { + float texelPos = f32tof16(dot(texels[i], blockDir)); + indices[i] = ComputeIndex4(texelPos, endPoint0Pos, endPoint1Pos); + } + + // compute compression error (MSLE) + float3 endpoint0Unq = Unquantize10(endpoint0); + float3 endpoint1Unq = Unquantize10(endpoint1); + float msle = 0.0f; + for (uint i = 0u; i < 16u; ++i) { + float weight = floor((indices[i] * 64.0f) / 15.0f + 0.5f); + float3 texelUnc = FinishUnquantize(endpoint0Unq, endpoint1Unq, weight); + + msle += CalcMSLE(texels[i], texelUnc); + } + + // encode block for mode 11 + blockMSLE = msle; + block.x = 0x03; + + // endpoints + block.x |= uint(endpoint0.x) << 5u; + block.x |= uint(endpoint0.y) << 15u; + block.x |= uint(endpoint0.z) << 25u; + block.y |= uint(endpoint0.z) >> 7u; + block.y |= uint(endpoint1.x) << 3u; + block.y |= uint(endpoint1.y) << 13u; + block.y |= uint(endpoint1.z) << 23u; + block.z |= uint(endpoint1.z) >> 9u; + + // indices + block.z |= indices[0] << 1u; + block.z |= indices[1] << 4u; + block.z |= indices[2] << 8u; + block.z |= indices[3] << 12u; + block.z |= indices[4] << 16u; + block.z |= indices[5] << 20u; + block.z |= indices[6] << 24u; + block.z |= indices[7] << 28u; + block.w |= indices[8] << 0u; + block.w |= indices[9] << 4u; + block.w |= indices[10] << 8u; + block.w |= indices[11] << 12u; + block.w |= indices[12] << 16u; + block.w |= indices[13] << 20u; + block.w |= indices[14] << 24u; + block.w |= indices[15] << 28u; +} + +float DistToLineSq(float3 PointOnLine, float3 LineDirection, float3 Point) { + float3 w = Point - PointOnLine; + float3 x = w - dot(w, LineDirection) * LineDirection; + + return dot(x, x); +} + +float EvaluateP2Pattern(uint pattern, float3 texels[16]) { + float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); + float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); + + for (uint i = 0; i < 16; ++i) { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) { + p0BlockMin = min(p0BlockMin, texels[i]); + p0BlockMax = max(p0BlockMax, texels[i]); + } else { + p1BlockMin = min(p1BlockMin, texels[i]); + p1BlockMax = max(p1BlockMax, texels[i]); + } + } + + float3 p0BlockDir = normalize(p0BlockMax - p0BlockMin); + float3 p1BlockDir = normalize(p1BlockMax - p1BlockMin); + + float sqDistanceFromLine = 0.0f; + + for (uint i = 0; i < 16; ++i) { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) { + sqDistanceFromLine += DistToLineSq(p0BlockMin, p0BlockDir, texels[i]); + } else { + sqDistanceFromLine += DistToLineSq(p1BlockMin, p1BlockDir, texels[i]); + } + } + + return sqDistanceFromLine; +} + +void EncodeP2Pattern(inout uint4 block, inout float blockMSLE, uint pattern, float3 texels[16]) { + float3 p0BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p0BlockMax = float3(0.0f, 0.0f, 0.0f); + float3 p1BlockMin = float3(HALF_MAX, HALF_MAX, HALF_MAX); + float3 p1BlockMax = float3(0.0f, 0.0f, 0.0f); + + for (uint i = 0u; i < 16u; ++i) { + uint paletteID = Pattern(pattern, i); + if (paletteID == 0) { + p0BlockMin = min(p0BlockMin, texels[i]); + p0BlockMax = max(p0BlockMax, texels[i]); + } else { + p1BlockMin = min(p1BlockMin, texels[i]); + p1BlockMax = max(p1BlockMax, texels[i]); + } + } + + float3 p0BlockDir = p0BlockMax - p0BlockMin; + float3 p1BlockDir = p1BlockMax - p1BlockMin; + p0BlockDir = p0BlockDir / (p0BlockDir.x + p0BlockDir.y + p0BlockDir.z); + p1BlockDir = p1BlockDir / (p1BlockDir.x + p1BlockDir.y + p1BlockDir.z); + + float p0Endpoint0Pos = f32tof16(dot(p0BlockMin, p0BlockDir)); + float p0Endpoint1Pos = f32tof16(dot(p0BlockMax, p0BlockDir)); + float p1Endpoint0Pos = f32tof16(dot(p1BlockMin, p1BlockDir)); + float p1Endpoint1Pos = f32tof16(dot(p1BlockMax, p1BlockDir)); + + uint fixupID = PatternFixupID(pattern); + float p0FixupTexelPos = f32tof16(dot(texels[0], p0BlockDir)); + float p1FixupTexelPos = f32tof16(dot(texels[fixupID], p1BlockDir)); + uint p0FixupIndex = ComputeIndex3(p0FixupTexelPos, p0Endpoint0Pos, p0Endpoint1Pos); + uint p1FixupIndex = ComputeIndex3(p1FixupTexelPos, p1Endpoint0Pos, p1Endpoint1Pos); + if (p0FixupIndex > 3u) { + Swap(p0Endpoint0Pos, p0Endpoint1Pos); + Swap(p0BlockMin, p0BlockMax); + } + if (p1FixupIndex > 3u) { + Swap(p1Endpoint0Pos, p1Endpoint1Pos); + Swap(p1BlockMin, p1BlockMax); + } + + uint indices[16] = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u }; + for (uint i = 0u; i < 16u; ++i) { + float p0TexelPos = f32tof16(dot(texels[i], p0BlockDir)); + float p1TexelPos = f32tof16(dot(texels[i], p1BlockDir)); + uint p0Index = ComputeIndex3(p0TexelPos, p0Endpoint0Pos, p0Endpoint1Pos); + uint p1Index = ComputeIndex3(p1TexelPos, p1Endpoint0Pos, p1Endpoint1Pos); + + uint paletteID = Pattern(pattern, i); + indices[i] = paletteID == 0u ? p0Index : p1Index; + } + + float3 endpoint760 = floor(Quantize7(p0BlockMin)); + float3 endpoint761 = floor(Quantize7(p0BlockMax)); + float3 endpoint762 = floor(Quantize7(p1BlockMin)); + float3 endpoint763 = floor(Quantize7(p1BlockMax)); + + float3 endpoint950 = floor(Quantize9(p0BlockMin)); + float3 endpoint951 = floor(Quantize9(p0BlockMax)); + float3 endpoint952 = floor(Quantize9(p1BlockMin)); + float3 endpoint953 = floor(Quantize9(p1BlockMax)); + + endpoint761 = endpoint761 - endpoint760; + endpoint762 = endpoint762 - endpoint760; + endpoint763 = endpoint763 - endpoint760; + + endpoint951 = endpoint951 - endpoint950; + endpoint952 = endpoint952 - endpoint950; + endpoint953 = endpoint953 - endpoint950; + + int maxVal76 = 0x1F; + endpoint761 = clamp(endpoint761, -maxVal76, maxVal76); + endpoint762 = clamp(endpoint762, -maxVal76, maxVal76); + endpoint763 = clamp(endpoint763, -maxVal76, maxVal76); + + int maxVal95 = 0xF; + endpoint951 = clamp(endpoint951, -maxVal95, maxVal95); + endpoint952 = clamp(endpoint952, -maxVal95, maxVal95); + endpoint953 = clamp(endpoint953, -maxVal95, maxVal95); + + float3 endpoint760Unq = Unquantize7(endpoint760); + float3 endpoint761Unq = Unquantize7(endpoint760 + endpoint761); + float3 endpoint762Unq = Unquantize7(endpoint760 + endpoint762); + float3 endpoint763Unq = Unquantize7(endpoint760 + endpoint763); + float3 endpoint950Unq = Unquantize9(endpoint950); + float3 endpoint951Unq = Unquantize9(endpoint950 + endpoint951); + float3 endpoint952Unq = Unquantize9(endpoint950 + endpoint952); + float3 endpoint953Unq = Unquantize9(endpoint950 + endpoint953); + + float msle76 = 0.0f; + float msle95 = 0.0f; + for (uint i = 0u; i < 16u; ++i) { + uint paletteID = Pattern(pattern, i); + + float3 tmp760Unq = paletteID == 0u ? endpoint760Unq : endpoint762Unq; + float3 tmp761Unq = paletteID == 0u ? endpoint761Unq : endpoint763Unq; + float3 tmp950Unq = paletteID == 0u ? endpoint950Unq : endpoint952Unq; + float3 tmp951Unq = paletteID == 0u ? endpoint951Unq : endpoint953Unq; + + float weight = floor((indices[i] * 64.0f) / 7.0f + 0.5f); + float3 texelUnc76 = FinishUnquantize(tmp760Unq, tmp761Unq, weight); + float3 texelUnc95 = FinishUnquantize(tmp950Unq, tmp951Unq, weight); + + msle76 += CalcMSLE(texels[i], texelUnc76); + msle95 += CalcMSLE(texels[i], texelUnc95); + } + + SignExtend(endpoint761, 0x1F, 0x20); + SignExtend(endpoint762, 0x1F, 0x20); + SignExtend(endpoint763, 0x1F, 0x20); + + SignExtend(endpoint951, 0xF, 0x10); + SignExtend(endpoint952, 0xF, 0x10); + SignExtend(endpoint953, 0xF, 0x10); + + // encode block + float p2MSLE = min(msle76, msle95); + if (p2MSLE < blockMSLE) { + blockMSLE = p2MSLE; + block = uint4(0u, 0u, 0u, 0u); + + if (p2MSLE == msle76) { + // 7.6 + block.x = 0x1u; + block.x |= (uint(endpoint762.y) & 0x20u) >> 3u; + block.x |= (uint(endpoint763.y) & 0x10u) >> 1u; + block.x |= (uint(endpoint763.y) & 0x20u) >> 1u; + block.x |= uint(endpoint760.x) << 5u; + block.x |= (uint(endpoint763.z) & 0x01u) << 12u; + block.x |= (uint(endpoint763.z) & 0x02u) << 12u; + block.x |= (uint(endpoint762.z) & 0x10u) << 10u; + block.x |= uint(endpoint760.y) << 15u; + block.x |= (uint(endpoint762.z) & 0x20u) << 17u; + block.x |= (uint(endpoint763.z) & 0x04u) << 21u; + block.x |= (uint(endpoint762.y) & 0x10u) << 20u; + block.x |= uint(endpoint760.z) << 25u; + block.y |= (uint(endpoint763.z) & 0x08u) >> 3u; + block.y |= (uint(endpoint763.z) & 0x20u) >> 4u; + block.y |= (uint(endpoint763.z) & 0x10u) >> 2u; + block.y |= uint(endpoint761.x) << 3u; + block.y |= (uint(endpoint762.y) & 0x0Fu) << 9u; + block.y |= uint(endpoint761.y) << 13u; + block.y |= (uint(endpoint763.y) & 0x0Fu) << 19u; + block.y |= uint(endpoint761.z) << 23u; + block.y |= (uint(endpoint762.z) & 0x07u) << 29u; + block.z |= (uint(endpoint762.z) & 0x08u) >> 3u; + block.z |= uint(endpoint762.x) << 1u; + block.z |= uint(endpoint763.x) << 7u; + } else { + // 9.5 + block.x = 0xEu; + block.x |= uint(endpoint950.x) << 5u; + block.x |= (uint(endpoint952.z) & 0x10u) << 10u; + block.x |= uint(endpoint950.y) << 15u; + block.x |= (uint(endpoint952.y) & 0x10u) << 20u; + block.x |= uint(endpoint950.z) << 25u; + block.y |= uint(endpoint950.z) >> 7u; + block.y |= (uint(endpoint953.z) & 0x10u) >> 2u; + block.y |= uint(endpoint951.x) << 3u; + block.y |= (uint(endpoint953.y) & 0x10u) << 4u; + block.y |= (uint(endpoint952.y) & 0x0Fu) << 9u; + block.y |= uint(endpoint951.y) << 13u; + block.y |= (uint(endpoint953.z) & 0x01u) << 18u; + block.y |= (uint(endpoint953.y) & 0x0Fu) << 19u; + block.y |= uint(endpoint951.z) << 23u; + block.y |= (uint(endpoint953.z) & 0x02u) << 27u; + block.y |= uint(endpoint952.z) << 29u; + block.z |= (uint(endpoint952.z) & 0x08u) >> 3u; + block.z |= uint(endpoint952.x) << 1u; + block.z |= (uint(endpoint953.z) & 0x04u) << 4u; + block.z |= uint(endpoint953.x) << 7u; + block.z |= (uint(endpoint953.z) & 0x08u) << 9u; + } + + block.z |= pattern << 13u; + uint blockFixupID = PatternFixupID(pattern); + if (blockFixupID == 15u) { + block.z |= indices[0] << 18u; + block.z |= indices[1] << 20u; + block.z |= indices[2] << 23u; + block.z |= indices[3] << 26u; + block.z |= indices[4] << 29u; + block.w |= indices[5] << 0u; + block.w |= indices[6] << 3u; + block.w |= indices[7] << 6u; + block.w |= indices[8] << 9u; + block.w |= indices[9] << 12u; + block.w |= indices[10] << 15u; + block.w |= indices[11] << 18u; + block.w |= indices[12] << 21u; + block.w |= indices[13] << 24u; + block.w |= indices[14] << 27u; + block.w |= indices[15] << 30u; + } else if (blockFixupID == 2u) { + block.z |= indices[0] << 18u; + block.z |= indices[1] << 20u; + block.z |= indices[2] << 23u; + block.z |= indices[3] << 25u; + block.z |= indices[4] << 28u; + block.z |= indices[5] << 31u; + block.w |= indices[5] >> 1u; + block.w |= indices[6] << 2u; + block.w |= indices[7] << 5u; + block.w |= indices[8] << 8u; + block.w |= indices[9] << 11u; + block.w |= indices[10] << 14u; + block.w |= indices[11] << 17u; + block.w |= indices[12] << 20u; + block.w |= indices[13] << 23u; + block.w |= indices[14] << 26u; + block.w |= indices[15] << 29u; + } else { + block.z |= indices[0] << 18u; + block.z |= indices[1] << 20u; + block.z |= indices[2] << 23u; + block.z |= indices[3] << 26u; + block.z |= indices[4] << 29u; + block.w |= indices[5] << 0u; + block.w |= indices[6] << 3u; + block.w |= indices[7] << 6u; + block.w |= indices[8] << 9u; + block.w |= indices[9] << 11u; + block.w |= indices[10] << 14u; + block.w |= indices[11] << 17u; + block.w |= indices[12] << 20u; + block.w |= indices[13] << 23u; + block.w |= indices[14] << 26u; + block.w |= indices[15] << 29u; + } + } +} + +layout(local_size_x = 8, + local_size_y = 8, + local_size_z = 1) in; + +void main() { + // gather texels for current 4x4 block + // 0 1 2 3 + // 4 5 6 7 + // 8 9 10 11 + // 12 13 14 15 + float2 uv = gl_GlobalInvocationID.xy * params.p_textureSizeRcp * 4.0f + params.p_textureSizeRcp; + float2 block0UV = uv; + float2 block1UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 0.0f); + float2 block2UV = uv + float2(0.0f, 2.0f * params.p_textureSizeRcp.y); + float2 block3UV = uv + float2(2.0f * params.p_textureSizeRcp.x, 2.0f * params.p_textureSizeRcp.y); + float4 block0X = OGRE_GatherRed(srcTexture, pointSampler, block0UV); + float4 block1X = OGRE_GatherRed(srcTexture, pointSampler, block1UV); + float4 block2X = OGRE_GatherRed(srcTexture, pointSampler, block2UV); + float4 block3X = OGRE_GatherRed(srcTexture, pointSampler, block3UV); + float4 block0Y = OGRE_GatherGreen(srcTexture, pointSampler, block0UV); + float4 block1Y = OGRE_GatherGreen(srcTexture, pointSampler, block1UV); + float4 block2Y = OGRE_GatherGreen(srcTexture, pointSampler, block2UV); + float4 block3Y = OGRE_GatherGreen(srcTexture, pointSampler, block3UV); + float4 block0Z = OGRE_GatherBlue(srcTexture, pointSampler, block0UV); + float4 block1Z = OGRE_GatherBlue(srcTexture, pointSampler, block1UV); + float4 block2Z = OGRE_GatherBlue(srcTexture, pointSampler, block2UV); + float4 block3Z = OGRE_GatherBlue(srcTexture, pointSampler, block3UV); + + float3 texels[16]; + texels[0] = float3(block0X.w, block0Y.w, block0Z.w); + texels[1] = float3(block0X.z, block0Y.z, block0Z.z); + texels[2] = float3(block1X.w, block1Y.w, block1Z.w); + texels[3] = float3(block1X.z, block1Y.z, block1Z.z); + texels[4] = float3(block0X.x, block0Y.x, block0Z.x); + texels[5] = float3(block0X.y, block0Y.y, block0Z.y); + texels[6] = float3(block1X.x, block1Y.x, block1Z.x); + texels[7] = float3(block1X.y, block1Y.y, block1Z.y); + texels[8] = float3(block2X.w, block2Y.w, block2Z.w); + texels[9] = float3(block2X.z, block2Y.z, block2Z.z); + texels[10] = float3(block3X.w, block3Y.w, block3Z.w); + texels[11] = float3(block3X.z, block3Y.z, block3Z.z); + texels[12] = float3(block2X.x, block2Y.x, block2Z.x); + texels[13] = float3(block2X.y, block2Y.y, block2Z.y); + texels[14] = float3(block3X.x, block3Y.x, block3Z.x); + texels[15] = float3(block3X.y, block3Y.y, block3Z.y); + + uint4 block = uint4(0u, 0u, 0u, 0u); + float blockMSLE = 0.0f; + + EncodeP1(block, blockMSLE, texels); + +#ifdef QUALITY + float bestScore = EvaluateP2Pattern(0, texels); + uint bestPattern = 0; + + for (uint i = 1u; i < 32u; ++i) { + float score = EvaluateP2Pattern(i, texels); + + if (score < bestScore) { + bestPattern = i; + bestScore = score; + } + } + + EncodeP2Pattern(block, blockMSLE, bestPattern, texels); +#endif + + imageStore(dstTexture, int2(gl_GlobalInvocationID.xy), block); +} diff --git a/modules/betsy/config.py b/modules/betsy/config.py new file mode 100644 index 000000000000..eb565b85b903 --- /dev/null +++ b/modules/betsy/config.py @@ -0,0 +1,6 @@ +def can_build(env, platform): + return env.editor_build + + +def configure(env): + pass diff --git a/modules/betsy/image_compress_betsy.cpp b/modules/betsy/image_compress_betsy.cpp new file mode 100644 index 000000000000..6a0862e72907 --- /dev/null +++ b/modules/betsy/image_compress_betsy.cpp @@ -0,0 +1,354 @@ +/**************************************************************************/ +/* image_compress_betsy.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "image_compress_betsy.h" + +#include "servers/rendering/rendering_device_binds.h" +#include "servers/rendering/rendering_server_default.h" + +#if defined(VULKAN_ENABLED) +#include "drivers/vulkan/rendering_context_driver_vulkan.h" +#endif + +#include "bc6h.glsl.gen.h" + +struct BC6PushConstant { + float sizeX; + float sizeY; + uint32_t padding[2]; +}; + +static int get_next_multiple(int n, int m) { + return n + (m - (n % m)); +} + +static bool is_image_signed(const Image *r_img) { + if (r_img->get_format() >= Image::FORMAT_RH && r_img->get_format() <= Image::FORMAT_RGBAH) { + const uint16_t *img_data = reinterpret_cast(r_img->ptr()); + const uint64_t img_size = r_img->get_data_size() / 2; + + for (uint64_t i = 0; i < img_size; i++) { + if ((img_data[i] & 0x8000) != 0 && (img_data[i] & 0x7fff) != 0) { + return true; + } + } + + } else if (r_img->get_format() >= Image::FORMAT_RF && r_img->get_format() <= Image::FORMAT_RGBAF) { + const uint32_t *img_data = reinterpret_cast(r_img->ptr()); + const uint64_t img_size = r_img->get_data_size() / 4; + + for (uint64_t i = 0; i < img_size; i++) { + if ((img_data[i] & 0x80000000) != 0 && (img_data[i] & 0x7fffffff) != 0) { + return true; + } + } + } + + return false; +} + +Error _compress_betsy(BetsyFormat p_format, Image *r_img) { + uint64_t start_time = OS::get_singleton()->get_ticks_msec(); + + if (r_img->is_compressed()) { + return ERR_INVALID_DATA; + } + + ERR_FAIL_COND_V_MSG(r_img->get_format() < Image::FORMAT_RF || r_img->get_format() > Image::FORMAT_RGBE9995, ERR_INVALID_DATA, "Image is not an HDR image."); + + Error err = OK; + + // Create local RD. + RenderingContextDriver *rcd = nullptr; + RenderingDevice *rd = RenderingServer::get_singleton()->create_local_rendering_device(); + + if (rd == nullptr) { +#if defined(RD_ENABLED) +#if defined(VULKAN_ENABLED) + rcd = memnew(RenderingContextDriverVulkan); + rd = memnew(RenderingDevice); +#endif +#endif + if (rcd != nullptr && rd != nullptr) { + err = rcd->initialize(); + if (err == OK) { + err = rd->initialize(rcd); + } + + if (err != OK) { + memdelete(rd); + memdelete(rcd); + rd = nullptr; + rcd = nullptr; + } + } + } + + ERR_FAIL_NULL_V_MSG(rd, err, "Unable to create a local RenderingDevice."); + + Ref compute_shader; + compute_shader.instantiate(); + + // Destination format. + Image::Format dest_format = Image::FORMAT_MAX; + + String version = ""; + + switch (p_format) { + case BETSY_FORMAT_BC6: { + err = compute_shader->parse_versions_from_text(bc6h_shader_glsl); + + if (is_image_signed(r_img)) { + dest_format = Image::FORMAT_BPTC_RGBF; + version = "signed"; + } else { + dest_format = Image::FORMAT_BPTC_RGBFU; + version = "unsigned"; + } + + } break; + + default: + err = ERR_INVALID_PARAMETER; + break; + } + + if (err != OK) { + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + return err; + } + + // Compile the shader, return early if invalid. + RID shader = rd->shader_create_from_spirv(compute_shader->get_spirv_stages(version)); + + if (shader.is_null()) { + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + return err; + } + + RID pipeline = rd->compute_pipeline_create(shader); + + // src_texture format information. + RD::TextureFormat src_texture_format; + { + src_texture_format.array_layers = 1; + src_texture_format.depth = 1; + src_texture_format.mipmaps = 1; + src_texture_format.texture_type = RD::TEXTURE_TYPE_2D; + src_texture_format.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + } + + switch (r_img->get_format()) { + case Image::FORMAT_RH: + src_texture_format.format = RD::DATA_FORMAT_R16_SFLOAT; + break; + + case Image::FORMAT_RGH: + src_texture_format.format = RD::DATA_FORMAT_R16G16_SFLOAT; + break; + + case Image::FORMAT_RGBH: + r_img->convert(Image::FORMAT_RGBAH); + src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + break; + + case Image::FORMAT_RGBAH: + src_texture_format.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT; + break; + + case Image::FORMAT_RF: + src_texture_format.format = RD::DATA_FORMAT_R32_SFLOAT; + break; + + case Image::FORMAT_RGF: + src_texture_format.format = RD::DATA_FORMAT_R32G32_SFLOAT; + break; + + case Image::FORMAT_RGBF: + r_img->convert(Image::FORMAT_RGBAF); + src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + break; + + case Image::FORMAT_RGBAF: + src_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_SFLOAT; + break; + + case Image::FORMAT_RGBE9995: + src_texture_format.format = RD::DATA_FORMAT_E5B9G9R9_UFLOAT_PACK32; + break; + + default: { + rd->free(shader); + + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + return err; + } + } + + // Create the sampler state. + RD::SamplerState src_sampler_state; + { + src_sampler_state.repeat_u = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + src_sampler_state.repeat_v = RD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE; + src_sampler_state.mag_filter = RD::SAMPLER_FILTER_NEAREST; + src_sampler_state.min_filter = RD::SAMPLER_FILTER_NEAREST; + src_sampler_state.mip_filter = RD::SAMPLER_FILTER_NEAREST; + } + + RID src_sampler = rd->sampler_create(src_sampler_state); + + // For the destination format just copy the source format and change the usage bits. + RD::TextureFormat dst_texture_format = src_texture_format; + dst_texture_format.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_CAN_UPDATE_BIT; + dst_texture_format.format = RD::DATA_FORMAT_R32G32B32A32_UINT; + + const int mip_count = r_img->get_mipmap_count() + 1; + + // Container for the compressed data. + Vector dst_data; + dst_data.resize(Image::get_image_data_size(r_img->get_width(), r_img->get_height(), dest_format, r_img->has_mipmaps())); + uint8_t *dst_data_ptr = dst_data.ptrw(); + + Vector> src_images; + src_images.push_back(Vector()); + Vector *src_image_ptr = src_images.ptrw(); + + // Compress each mipmap. + for (int i = 0; i < mip_count; i++) { + int64_t ofs, size; + int width, height; + r_img->get_mipmap_offset_size_and_dimensions(i, ofs, size, width, height); + + // Set the source texture width and size. + src_texture_format.height = height; + src_texture_format.width = width; + + // Set the destination texture width and size. + dst_texture_format.height = (height + 3) >> 2; + dst_texture_format.width = (width + 3) >> 2; + + // Create a buffer filled with the source mip layer data. + src_image_ptr[0].resize(size); + memcpy(src_image_ptr[0].ptrw(), r_img->ptr() + ofs, size); + + // Create the textures on the GPU. + RID src_texture = rd->texture_create(src_texture_format, RD::TextureView(), src_images); + RID dst_texture = rd->texture_create(dst_texture_format, RD::TextureView()); + + if (dest_format == Image::FORMAT_BPTC_RGBFU || dest_format == Image::FORMAT_BPTC_RGBF) { + BC6PushConstant push_constant; + push_constant.sizeX = 1.0f / width; + push_constant.sizeY = 1.0f / height; + push_constant.padding[0] = 0; + push_constant.padding[1] = 0; + + Vector uniforms; + { + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE; + u.binding = 0; + u.append_id(src_sampler); + u.append_id(src_texture); + uniforms.push_back(u); + } + { + RD::Uniform u; + u.uniform_type = RD::UNIFORM_TYPE_IMAGE; + u.binding = 1; + u.append_id(dst_texture); + uniforms.push_back(u); + } + } + + RID uniform_set = rd->uniform_set_create(uniforms, shader, 0); + RD::ComputeListID compute_list = rd->compute_list_begin(); + + rd->compute_list_bind_compute_pipeline(compute_list, pipeline); + rd->compute_list_bind_uniform_set(compute_list, uniform_set, 0); + rd->compute_list_set_push_constant(compute_list, &push_constant, sizeof(BC6PushConstant)); + rd->compute_list_dispatch(compute_list, get_next_multiple(width, 32) / 32, get_next_multiple(height, 32) / 32, 1); + rd->compute_list_end(); + } + + rd->submit(); + rd->sync(); + + // Copy data from the GPU to the buffer. + const Vector texture_data = rd->texture_get_data(dst_texture, 0); + int64_t dst_ofs = Image::get_image_mipmap_offset(r_img->get_width(), r_img->get_height(), dest_format, i); + + memcpy(dst_data_ptr + dst_ofs, texture_data.ptr(), texture_data.size()); + + // Free the source and dest texture. + rd->free(dst_texture); + rd->free(src_texture); + } + + src_images.clear(); + + // Set the compressed data to the image. + r_img->set_data(r_img->get_width(), r_img->get_height(), r_img->has_mipmaps(), dest_format, dst_data); + + // Free the shader (dependencies will be cleared automatically). + rd->free(src_sampler); + rd->free(shader); + + memdelete(rd); + if (rcd != nullptr) { + memdelete(rcd); + } + + print_verbose(vformat("Betsy: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time)); + + return OK; +} + +Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels) { + Image::Format format = r_img->get_format(); + + if (format >= Image::FORMAT_RF && format <= Image::FORMAT_RGBE9995) { + return _compress_betsy(BETSY_FORMAT_BC6, r_img); + } + + return ERR_UNAVAILABLE; +} diff --git a/modules/betsy/image_compress_betsy.h b/modules/betsy/image_compress_betsy.h new file mode 100644 index 000000000000..a64e586c76c5 --- /dev/null +++ b/modules/betsy/image_compress_betsy.h @@ -0,0 +1,44 @@ +/**************************************************************************/ +/* image_compress_betsy.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef IMAGE_COMPRESS_BETSY_H +#define IMAGE_COMPRESS_BETSY_H + +#include "core/io/image.h" + +enum BetsyFormat { + BETSY_FORMAT_BC6, +}; + +Error _compress_betsy(BetsyFormat p_format, Image *r_img); + +Error _betsy_compress_bptc(Image *r_img, Image::UsedChannels p_channels); + +#endif // IMAGE_COMPRESS_BETSY_H diff --git a/modules/betsy/register_types.cpp b/modules/betsy/register_types.cpp new file mode 100644 index 000000000000..019099e67c6c --- /dev/null +++ b/modules/betsy/register_types.cpp @@ -0,0 +1,47 @@ +/**************************************************************************/ +/* register_types.cpp */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#include "register_types.h" + +#include "image_compress_betsy.h" + +void initialize_betsy_module(ModuleInitializationLevel p_level) { + if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) { + return; + } + + Image::_image_compress_bptc_rd_func = _betsy_compress_bptc; +} + +void uninitialize_betsy_module(ModuleInitializationLevel p_level) { + if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) { + return; + } +} diff --git a/modules/betsy/register_types.h b/modules/betsy/register_types.h new file mode 100644 index 000000000000..0ce6c553b666 --- /dev/null +++ b/modules/betsy/register_types.h @@ -0,0 +1,39 @@ +/**************************************************************************/ +/* register_types.h */ +/**************************************************************************/ +/* This file is part of: */ +/* GODOT ENGINE */ +/* https://godotengine.org */ +/**************************************************************************/ +/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */ +/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */ +/* */ +/* Permission is hereby granted, free of charge, to any person obtaining */ +/* a copy of this software and associated documentation files (the */ +/* "Software"), to deal in the Software without restriction, including */ +/* without limitation the rights to use, copy, modify, merge, publish, */ +/* distribute, sublicense, and/or sell copies of the Software, and to */ +/* permit persons to whom the Software is furnished to do so, subject to */ +/* the following conditions: */ +/* */ +/* The above copyright notice and this permission notice shall be */ +/* included in all copies or substantial portions of the Software. */ +/* */ +/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */ +/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */ +/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */ +/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */ +/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */ +/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */ +/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ +/**************************************************************************/ + +#ifndef BETSY_REGISTER_TYPES_H +#define BETSY_REGISTER_TYPES_H + +#include "modules/register_module_types.h" + +void initialize_betsy_module(ModuleInitializationLevel p_level); +void uninitialize_betsy_module(ModuleInitializationLevel p_level); + +#endif // BETSY_REGISTER_TYPES_H diff --git a/modules/cvtt/image_compress_cvtt.cpp b/modules/cvtt/image_compress_cvtt.cpp index 4938d8bff59b..ccc7dfd7ce9e 100644 --- a/modules/cvtt/image_compress_cvtt.cpp +++ b/modules/cvtt/image_compress_cvtt.cpp @@ -142,9 +142,12 @@ static void _digest_job_queue(void *p_job_queue, uint32_t p_index) { } void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { + uint64_t start_time = OS::get_singleton()->get_ticks_msec(); + if (p_image->is_compressed()) { return; //do not compress, already compressed } + int w = p_image->get_width(); int h = p_image->get_height(); @@ -250,6 +253,8 @@ void image_compress_cvtt(Image *p_image, Image::UsedChannels p_channels) { WorkerThreadPool::get_singleton()->wait_for_group_task_completion(group_task); p_image->set_data(p_image->get_width(), p_image->get_height(), p_image->has_mipmaps(), target_format, data); + + print_verbose(vformat("CVTT: Encoding took %d ms.", OS::get_singleton()->get_ticks_msec() - start_time)); } void image_decompress_cvtt(Image *p_image) { diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index 9fc67b04b1d7..3fe2cb2ed98b 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -3518,6 +3518,7 @@ void RenderingServer::init() { // See `const bool can_s3tc_bptc` in the resource importer. GLOBAL_DEF_RST("rendering/textures/vram_compression/import_s3tc_bptc", false); GLOBAL_DEF_RST("rendering/textures/vram_compression/import_etc2_astc", false); + GLOBAL_DEF("rendering/textures/vram_compression/compress_with_gpu", true); GLOBAL_DEF("rendering/textures/lossless_compression/force_png", false); diff --git a/thirdparty/README.md b/thirdparty/README.md index 47618d675bdd..624608e2c7e5 100644 --- a/thirdparty/README.md +++ b/thirdparty/README.md @@ -70,6 +70,17 @@ Files extracted from upstream source: Applied upstream PR https://github.com/BinomialLLC/basis_universal/pull/344 to fix build with our own copy of zstd (patch in `patches`). +## betsy + +- Upstream: https://github.com/darksylinc/betsy +- Version: git (cc723dcae9a6783ae572f64d12a90d60ef8d631a, 2022) +- License: MIT + +Files extracted from upstream source: + +- `bc6h.glsl`, `CrossPlatformSettings_piece_all.glsl` and `UavCrossPlatform_piece_all.glsl`. +- `LICENSE.md` + ## brotli diff --git a/thirdparty/betsy/LICENSE.md b/thirdparty/betsy/LICENSE.md new file mode 100644 index 000000000000..66fbf5a96bb0 --- /dev/null +++ b/thirdparty/betsy/LICENSE.md @@ -0,0 +1,18 @@ +Copyright 2020-2022 Matias N. Goldberg + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + +This software uses code from: + +* [GPURealTimeBC6H](https://github.com/knarkowicz/GPURealTimeBC6H), under public domain. Modifications by Matias N. Goldberg +* [rg-etc1](https://github.com/richgel999/rg-etc1/), Copyright (c) 2012 Rich Geldreich, zlib license. Extensive modifications by Matias N. Goldberg to adapt it as a compute shader +* [stb_dxt](https://github.com/nothings/stb/blob/master/stb_dxt.h), under dual-license: A. MIT License +Copyright (c) 2017 Sean Barrett, B. Public Domain (www.unlicense.org). Original by fabian "ryg" giesen - ported to C by stb. Modifications by Matias N. Goldberg to adapt it as a compute shader +* EAC loosely inspired on [etc2_encoder](https://github.com/titilambert/packaging-efl/blob/master/src/static_libs/rg_etc/etc2_encoder.c), Copyright (C) 2014 Jean-Philippe ANDRE, 2-clause BSD license +* ETC2 T & H modes based on [etc2_encoder](https://github.com/titilambert/packaging-efl/blob/master/src/static_libs/rg_etc/etc2_encoder.c), Copyright (C) 2014 Jean-Philippe ANDRE, 2-clause BSD license. A couple minor bugfixes applied by Matias N. Goldberg. Modifications made by Matias N. Goldberg to adapt it as a compute shader +* ETC2 P very loosely based on [etc2_encoder](https://github.com/titilambert/packaging-efl/blob/master/src/static_libs/rg_etc/etc2_encoder.c), Copyright (C) 2014 Jean-Philippe ANDRE, 2-clause BSD license. Considerable rewrite by Matias N. Goldberg to enhance its quality.