From 2fbaf7501e73169026ecd5dbf4280c7d6cba4c44 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Wed, 26 Apr 2023 18:52:51 +0200 Subject: [PATCH] Add high-precision GPU trilinear interpolation for 3D LUTs. This new code path can be enabled by disabling the new default-enabled OPTIMIZATION_NATIVE_GPU_TRILINEAR optimization flag. The existing code path used the GPU's native trilinear texture interpolation function, which, although faster, quantized the lookup coordinates which could cause banding. That's still the default, but full-precision trilinear interpolation can optionally be used instead. Signed-off-by: Nathan Vegdahl --- include/OpenColorIO/OpenColorTypes.h | 8 ++ src/OpenColorIO/GPUProcessor.cpp | 5 +- src/OpenColorIO/GPUProcessor.h | 1 + src/OpenColorIO/Op.h | 2 +- src/OpenColorIO/ops/cdl/CDLOp.cpp | 4 +- src/OpenColorIO/ops/exponent/ExponentOp.cpp | 4 +- .../exposurecontrast/ExposureContrastOp.cpp | 4 +- .../ops/fixedfunction/FixedFunctionOp.cpp | 4 +- src/OpenColorIO/ops/gamma/GammaOp.cpp | 4 +- .../ops/gradingprimary/GradingPrimaryOp.cpp | 4 +- .../ops/gradingrgbcurve/GradingRGBCurveOp.cpp | 4 +- .../ops/gradingtone/GradingToneOp.cpp | 4 +- src/OpenColorIO/ops/log/LogOp.cpp | 4 +- src/OpenColorIO/ops/lut1d/Lut1DOp.cpp | 4 +- src/OpenColorIO/ops/lut3d/Lut3DOp.cpp | 6 +- src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp | 86 +++++++++++++++---- src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h | 2 +- src/OpenColorIO/ops/matrix/MatrixOp.cpp | 4 +- src/OpenColorIO/ops/noop/NoOps.cpp | 6 +- src/OpenColorIO/ops/range/RangeOp.cpp | 4 +- 20 files changed, 116 insertions(+), 48 deletions(-) diff --git a/include/OpenColorIO/OpenColorTypes.h b/include/OpenColorIO/OpenColorTypes.h index c400568eaa..b5729d41a0 100644 --- a/include/OpenColorIO/OpenColorTypes.h +++ b/include/OpenColorIO/OpenColorTypes.h @@ -619,6 +619,13 @@ enum OptimizationFlags : unsigned long */ OPTIMIZATION_NO_DYNAMIC_PROPERTIES = 0x10000000, + /** + * For GPU processor, use native trilinear interpolation for 3D LUTs. This is faster, + * but on many GPUs also lower precision. With low-resolution LUTs, LUTs with large + * extents, or LUTs applied in a linear color space this can sometimes cause color banding. + */ + OPTIMIZATION_NATIVE_GPU_TRILINEAR = 0x20000000, + /// Apply all possible optimizations. OPTIMIZATION_ALL = 0xFFFFFFFF, @@ -645,6 +652,7 @@ enum OptimizationFlags : unsigned long OPTIMIZATION_COMP_LUT1D | OPTIMIZATION_LUT_INV_FAST | OPTIMIZATION_FAST_LOG_EXP_POW | + OPTIMIZATION_NATIVE_GPU_TRILINEAR | OPTIMIZATION_COMP_SEPARABLE_PREFIX), OPTIMIZATION_GOOD = OPTIMIZATION_VERY_GOOD | OPTIMIZATION_COMP_LUT3D, diff --git a/src/OpenColorIO/GPUProcessor.cpp b/src/OpenColorIO/GPUProcessor.cpp index e463903c08..9d86b2b770 100644 --- a/src/OpenColorIO/GPUProcessor.cpp +++ b/src/OpenColorIO/GPUProcessor.cpp @@ -85,6 +85,9 @@ void GPUProcessor::Impl::finalize(const OpRcPtrVec & rawOps, OptimizationFlags o // Is NoOp ? m_isNoOp = m_ops.isNoOp(); + // Store optimization flags for use when generating shader code. + m_oFlags = oFlags; + // Does the color processing introduce crosstalk between the pixel channels? m_hasChannelCrosstalk = m_ops.hasChannelCrosstalk(); @@ -104,7 +107,7 @@ void GPUProcessor::Impl::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCrea // Create the shader program information. for(const auto & op : m_ops) { - op->extractGpuShaderInfo(shaderCreator); + op->extractGpuShaderInfo(shaderCreator, m_oFlags); } WriteShaderHeader(shaderCreator); diff --git a/src/OpenColorIO/GPUProcessor.h b/src/OpenColorIO/GPUProcessor.h index 30039a1cd2..5ea1265730 100644 --- a/src/OpenColorIO/GPUProcessor.h +++ b/src/OpenColorIO/GPUProcessor.h @@ -41,6 +41,7 @@ class GPUProcessor::Impl OpRcPtrVec m_ops; bool m_isNoOp = false; bool m_hasChannelCrosstalk = true; + OptimizationFlags m_oFlags = OPTIMIZATION_DEFAULT; std::string m_cacheID; mutable Mutex m_mutex; }; diff --git a/src/OpenColorIO/Op.h b/src/OpenColorIO/Op.h index d48fe512db..f0c4149f20 100644 --- a/src/OpenColorIO/Op.h +++ b/src/OpenColorIO/Op.h @@ -246,7 +246,7 @@ class Op virtual bool supportedByLegacyShader() const { return true; } // Create & add the gpu shader information needed by the op. Op has to be finalized. - virtual void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const = 0; + virtual void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags oFlags = OPTIMIZATION_DEFAULT) const = 0; virtual bool isDynamic() const; virtual bool hasDynamicProperty(DynamicPropertyType type) const; diff --git a/src/OpenColorIO/ops/cdl/CDLOp.cpp b/src/OpenColorIO/ops/cdl/CDLOp.cpp index bd0511ea43..29d841a4d2 100644 --- a/src/OpenColorIO/ops/cdl/CDLOp.cpp +++ b/src/OpenColorIO/ops/cdl/CDLOp.cpp @@ -50,7 +50,7 @@ class CDLOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstCDLOpDataRcPtr cdlData() const { return DynamicPtrCast(data()); } @@ -133,7 +133,7 @@ ConstOpCPURcPtr CDLOp::getCPUOp(bool fastLogExpPow) const return GetCDLCPURenderer(data, fastLogExpPow); } -void CDLOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void CDLOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstCDLOpDataRcPtr data = cdlData(); GetCDLGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/exponent/ExponentOp.cpp b/src/OpenColorIO/ops/exponent/ExponentOp.cpp index 41b95992c4..0e667db23f 100644 --- a/src/OpenColorIO/ops/exponent/ExponentOp.cpp +++ b/src/OpenColorIO/ops/exponent/ExponentOp.cpp @@ -150,7 +150,7 @@ class ExponentOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstExponentOpDataRcPtr expData() const { return DynamicPtrCast(data()); } @@ -251,7 +251,7 @@ ConstOpCPURcPtr ExponentOp::getCPUOp(bool /*fastLogExpPow*/) const return std::make_shared(expData()); } -void ExponentOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void ExponentOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { GpuShaderText ss(shaderCreator->getLanguage()); ss.indent(); diff --git a/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp b/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp index fc5d1cd4e4..031b96fd02 100644 --- a/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp +++ b/src/OpenColorIO/ops/exposurecontrast/ExposureContrastOp.cpp @@ -49,7 +49,7 @@ class ExposureContrastOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstExposureContrastOpDataRcPtr ecData() const @@ -135,7 +135,7 @@ ConstOpCPURcPtr ExposureContrastOp::getCPUOp(bool /*fastLogExpPow*/) const return GetExposureContrastCPURenderer(ecOpData); } -void ExposureContrastOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void ExposureContrastOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstExposureContrastOpDataRcPtr ecOpData = ecData(); GetExposureContrastGPUShaderProgram(shaderCreator, ecOpData); diff --git a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp index ac672e1190..f85f4321c2 100644 --- a/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp +++ b/src/OpenColorIO/ops/fixedfunction/FixedFunctionOp.cpp @@ -45,7 +45,7 @@ class FixedFunctionOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstFixedFunctionOpDataRcPtr fnData() const { return DynamicPtrCast(data()); } @@ -125,7 +125,7 @@ ConstOpCPURcPtr FixedFunctionOp::getCPUOp(bool /*fastLogExpPow*/) const return GetFixedFunctionCPURenderer(data); } -void FixedFunctionOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void FixedFunctionOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstFixedFunctionOpDataRcPtr fnOpData = fnData(); GetFixedFunctionGPUShaderProgram(shaderCreator, fnOpData); diff --git a/src/OpenColorIO/ops/gamma/GammaOp.cpp b/src/OpenColorIO/ops/gamma/GammaOp.cpp index 9d6afccc7d..b1683c0ec7 100644 --- a/src/OpenColorIO/ops/gamma/GammaOp.cpp +++ b/src/OpenColorIO/ops/gamma/GammaOp.cpp @@ -44,7 +44,7 @@ class GammaOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGammaOpDataRcPtr gammaData() const { return DynamicPtrCast(data()); } @@ -123,7 +123,7 @@ ConstOpCPURcPtr GammaOp::getCPUOp(bool fastLogExpPow) const return GetGammaRenderer(data, fastLogExpPow); } -void GammaOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GammaOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGammaOpDataRcPtr data = gammaData(); GetGammaGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp b/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp index 5bb2f960bf..355c877167 100644 --- a/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp +++ b/src/OpenColorIO/ops/gradingprimary/GradingPrimaryOp.cpp @@ -52,7 +52,7 @@ class GradingPrimaryOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGradingPrimaryOpDataRcPtr primaryData() const @@ -190,7 +190,7 @@ ConstOpCPURcPtr GradingPrimaryOp::getCPUOp(bool /*fastLogExpPow*/) const return GetGradingPrimaryCPURenderer(data); } -void GradingPrimaryOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GradingPrimaryOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGradingPrimaryOpDataRcPtr data = primaryData(); GetGradingPrimaryGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp b/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp index 598ae28e67..69cfc37cec 100644 --- a/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp +++ b/src/OpenColorIO/ops/gradingrgbcurve/GradingRGBCurveOp.cpp @@ -52,7 +52,7 @@ class GradingRGBCurveOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGradingRGBCurveOpDataRcPtr rgbCurveData() const @@ -190,7 +190,7 @@ ConstOpCPURcPtr GradingRGBCurveOp::getCPUOp(bool /*fastLogExpPow*/) const return GetGradingRGBCurveCPURenderer(data); } -void GradingRGBCurveOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GradingRGBCurveOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGradingRGBCurveOpDataRcPtr data = rgbCurveData(); GetGradingRGBCurveGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp b/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp index 5968210e71..aeb24aa131 100644 --- a/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp +++ b/src/OpenColorIO/ops/gradingtone/GradingToneOp.cpp @@ -52,7 +52,7 @@ class GradingToneOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstGradingToneOpDataRcPtr toneData() const @@ -184,7 +184,7 @@ ConstOpCPURcPtr GradingToneOp::getCPUOp(bool /*fastLogExpPow*/) const return GetGradingToneCPURenderer(data); } -void GradingToneOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void GradingToneOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstGradingToneOpDataRcPtr data = toneData(); GetGradingToneGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/log/LogOp.cpp b/src/OpenColorIO/ops/log/LogOp.cpp index 8eeff9b3b2..7a1fc66221 100644 --- a/src/OpenColorIO/ops/log/LogOp.cpp +++ b/src/OpenColorIO/ops/log/LogOp.cpp @@ -44,7 +44,7 @@ class LogOp: public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstLogOpDataRcPtr logData() const { return DynamicPtrCast(data()); } @@ -110,7 +110,7 @@ ConstOpCPURcPtr LogOp::getCPUOp(bool fastLogExpPow) const return GetLogRenderer(data, fastLogExpPow); } -void LogOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void LogOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstLogOpDataRcPtr data = logData(); GetLogGPUShaderProgram(shaderCreator, data); diff --git a/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp b/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp index 412462414b..1fbfddfaa1 100644 --- a/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp +++ b/src/OpenColorIO/ops/lut1d/Lut1DOp.cpp @@ -52,7 +52,7 @@ class Lut1DOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; bool supportedByLegacyShader() const override { return false; } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; ConstLut1DOpDataRcPtr lut1DData() const { return DynamicPtrCast(data()); } Lut1DOpDataRcPtr lut1DData() { return DynamicPtrCast(data()); } @@ -154,7 +154,7 @@ ConstOpCPURcPtr Lut1DOp::getCPUOp(bool /*fastLogExpPow*/) const return GetLut1DRenderer(data, BIT_DEPTH_F32, BIT_DEPTH_F32); } -void Lut1DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void Lut1DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstLut1DOpDataRcPtr lutData = lut1DData(); if (lutData->getDirection() == TRANSFORM_DIR_INVERSE) diff --git a/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp b/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp index 4f0de76f4d..7ddfeb334f 100644 --- a/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp +++ b/src/OpenColorIO/ops/lut3d/Lut3DOp.cpp @@ -98,7 +98,7 @@ class Lut3DOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; bool supportedByLegacyShader() const override { return false; } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags oFlags) const override; protected: ConstLut3DOpDataRcPtr lut3DData() const @@ -200,7 +200,7 @@ ConstOpCPURcPtr Lut3DOp::getCPUOp(bool /*fastLogExpPow*/) const return GetLut3DRenderer(data); } -void Lut3DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void Lut3DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags oFlags) const { ConstLut3DOpDataRcPtr lutData = lut3DData(); if (lutData->getDirection() == TRANSFORM_DIR_INVERSE) @@ -216,7 +216,7 @@ void Lut3DOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const lutData = tmp; } - GetLut3DGPUShaderProgram(shaderCreator, lutData); + GetLut3DGPUShaderProgram(shaderCreator, lutData, oFlags); } } diff --git a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp index 786f52961c..5ef8c8c606 100644 --- a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp +++ b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.cpp @@ -14,7 +14,7 @@ namespace OCIO_NAMESPACE { -void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData) +void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData, OptimizationFlags oFlags) { if (shaderCreator->getLanguage() == LANGUAGE_OSL_1) @@ -32,9 +32,11 @@ void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DO std::string name(resName.str()); StringUtils::ReplaceInPlace(name, "__", "_"); + const bool use_high_precision = !HasFlag(oFlags, OPTIMIZATION_NATIVE_GPU_TRILINEAR); Interpolation samplerInterpolation = lutData->getConcreteInterpolation(); - // Enforce GL_NEAREST with shader-generated tetrahedral interpolation. - if (samplerInterpolation == INTERP_TETRAHEDRAL) + // Enforce GL_NEAREST with shader-generated tetrahedral interpolation + // or hand-rolled trilinear interpolation. + if (samplerInterpolation == INTERP_TETRAHEDRAL || use_high_precision) { samplerInterpolation = INTERP_NEAREST; } @@ -225,18 +227,72 @@ void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DO else { // Trilinear interpolation - // Use texture3d and GL_LINEAR and the GPU's built-in trilinear algorithm. - // Note that the fractional components are quantized to 8-bits on some - // hardware, which introduces significant error with small grid sizes. - - ss.newLine() << ss.float3Decl(name + "_coords") - << " = (" << shaderCreator->getPixelName() << ".zyx * " - << ss.float3Const(dim - 1) << " + " - << ss.float3Const(0.5f) + ") / " - << ss.float3Const(dim) << ";"; - - ss.newLine() << shaderCreator->getPixelName() << ".rgb = " - << ss.sampleTex3D(name, name + "_coords") << ".rgb;"; + if (use_high_precision) + { + // Use GL_NEAREST and do interpolation by hand to avoid the precision + // issues of native trilinear interpolation on many popular GPUs. + + ss.newLine() << ss.float3Decl("coords") << " = " + << shaderCreator->getPixelName() << ".rgb * " + << ss.float3Const(dim - 1) << "; "; + + // baseInd is on [0,dim-1] + ss.newLine() << ss.float3Decl("baseInd") << " = floor(coords);"; + + // frac is on [0,1] + ss.newLine() << ss.float3Decl("frac") << " = coords - baseInd;"; + + // Scale/offset baseInd onto [0,1] as usual for doing texture lookups. + // We use zyx to flip the order since blue varies most rapidly + // in the grid array ordering. + ss.newLine() << "baseInd = ( baseInd.zyx + " << ss.float3Const(0.5f) << " ) / " << ss.float3Const(dim) << ";"; + + // Fetch the 8 corners of the 3D texture cell. + ss.newLine() << ss.float3Decl("nextInd") << " = baseInd;"; + ss.newLine() << ss.float3Decl("v1") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, 0.0f, 0.0f) << ";"; + ss.newLine() << ss.float3Decl("v2") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(0.0f, incr, 0.0f) << ";"; + ss.newLine() << ss.float3Decl("v3") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, incr, 0.0f) << ";"; + ss.newLine() << ss.float3Decl("v4") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(0.0f, 0.0f, incr) << ";"; + ss.newLine() << ss.float3Decl("v5") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, 0.0f, incr) << ";"; + ss.newLine() << ss.float3Decl("v6") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(0.0f, incr, incr) << ";"; + ss.newLine() << ss.float3Decl("v7") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + ss.newLine() << "nextInd = baseInd + " << ss.float3Const(incr, incr, incr) << ";"; + ss.newLine() << ss.float3Decl("v8") << " = " << ss.sampleTex3D(name, "nextInd") << ".rgb;"; + + // Lerp on Z. + ss.newLine() << ss.float3Decl("v1_2") << " = " << ss.lerp("v1", "v2", "frac.z") << ";"; + ss.newLine() << ss.float3Decl("v3_4") << " = " << ss.lerp("v3", "v4", "frac.z") << ";"; + ss.newLine() << ss.float3Decl("v5_6") << " = " << ss.lerp("v5", "v6", "frac.z") << ";"; + ss.newLine() << ss.float3Decl("v7_8") << " = " << ss.lerp("v7", "v8", "frac.z") << ";"; + + // Lerp on Y. + ss.newLine() << ss.float3Decl("v1_2_3_4") << " = " << ss.lerp("v1_2", "v3_4", "frac.y") << ";"; + ss.newLine() << ss.float3Decl("v5_6_7_8") << " = " << ss.lerp("v5_6", "v7_8", "frac.y") << ";"; + + // Lerp on X. + ss.newLine() << shaderCreator->getPixelName() << ".rgb = " << ss.lerp("v1_2_3_4", "v5_6_7_8", "frac.x") << ";"; + } + else + { + // Use texture3d and GL_LINEAR and the GPU's built-in trilinear algorithm. + // Note that the fractional components are quantized to 8-bits on some + // hardware, which introduces significant error with small grid sizes. + + ss.newLine() << ss.float3Decl(name + "_coords") + << " = (" << shaderCreator->getPixelName() << ".zyx * " + << ss.float3Const(dim - 1) << " + " + << ss.float3Const(0.5f) + ") / " + << ss.float3Const(dim) << ";"; + + ss.newLine() << shaderCreator->getPixelName() << ".rgb = " + << ss.sampleTex3D(name, name + "_coords") << ".rgb;"; + } } shaderCreator->addToFunctionShaderCode(ss.string().c_str()); diff --git a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h index d34996f65f..68fc67aa55 100644 --- a/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h +++ b/src/OpenColorIO/ops/lut3d/Lut3DOpGPU.h @@ -12,7 +12,7 @@ namespace OCIO_NAMESPACE { -void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData); +void GetLut3DGPUShaderProgram(GpuShaderCreatorRcPtr & shaderCreator, ConstLut3DOpDataRcPtr & lutData, OptimizationFlags oFlags); } // namespace OCIO_NAMESPACE diff --git a/src/OpenColorIO/ops/matrix/MatrixOp.cpp b/src/OpenColorIO/ops/matrix/MatrixOp.cpp index 93505a284e..f4fc9b37c4 100644 --- a/src/OpenColorIO/ops/matrix/MatrixOp.cpp +++ b/src/OpenColorIO/ops/matrix/MatrixOp.cpp @@ -62,7 +62,7 @@ class MatrixOffsetOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstMatrixOpDataRcPtr matrixData() const { return DynamicPtrCast(data()); } @@ -187,7 +187,7 @@ ConstOpCPURcPtr MatrixOffsetOp::getCPUOp(bool /*fastLogExpPow*/) const return GetMatrixRenderer(data); } -void MatrixOffsetOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void MatrixOffsetOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstMatrixOpDataRcPtr data = matrixData(); if (data->getDirection() == TRANSFORM_DIR_INVERSE) diff --git a/src/OpenColorIO/ops/noop/NoOps.cpp b/src/OpenColorIO/ops/noop/NoOps.cpp index cf71b1fde5..326969cb52 100644 --- a/src/OpenColorIO/ops/noop/NoOps.cpp +++ b/src/OpenColorIO/ops/noop/NoOps.cpp @@ -51,7 +51,7 @@ class AllocationNoOp : public Op void apply(const void * inImg, void * outImg, long numPixels) const override { memcpy(outImg, inImg, numPixels * 4 * sizeof(float)); } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/) const override {} + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/, OptimizationFlags /*oFlags*/) const override {} void getGpuAllocation(AllocationData & allocation) const; @@ -322,7 +322,7 @@ class FileNoOp : public Op void apply(const void * inImg, void * outImg, long numPixels) const override { memcpy(outImg, inImg, numPixels * 4 * sizeof(float)); } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/) const override {} + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/, OptimizationFlags /*oFlags*/) const override {} private: std::string m_fileReference; @@ -408,7 +408,7 @@ class LookNoOp : public Op void apply(const void * inImg, void * outImg, long numPixels) const override { memcpy(outImg, inImg, numPixels * 4 * sizeof(float)); } - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/) const override {} + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & /*shaderCreator*/, OptimizationFlags /*oFlags*/) const override {} private: std::string m_look; diff --git a/src/OpenColorIO/ops/range/RangeOp.cpp b/src/OpenColorIO/ops/range/RangeOp.cpp index f708908fbd..ac3321ec03 100644 --- a/src/OpenColorIO/ops/range/RangeOp.cpp +++ b/src/OpenColorIO/ops/range/RangeOp.cpp @@ -51,7 +51,7 @@ class RangeOp : public Op ConstOpCPURcPtr getCPUOp(bool fastLogExpPow) const override; - void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const override; + void extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const override; protected: ConstRangeOpDataRcPtr rangeData() const { return DynamicPtrCast(data()); } @@ -199,7 +199,7 @@ ConstOpCPURcPtr RangeOp::getCPUOp(bool /*fastLogExpPow*/) const return GetRangeRenderer(data); } -void RangeOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator) const +void RangeOp::extractGpuShaderInfo(GpuShaderCreatorRcPtr & shaderCreator, OptimizationFlags /*oFlags*/) const { ConstRangeOpDataRcPtr data = rangeData(); if (data->getDirection() == TRANSFORM_DIR_INVERSE)