diff --git a/desmume/src/FIFO.cpp b/desmume/src/FIFO.cpp index ae812cefc..e0489c909 100755 --- a/desmume/src/FIFO.cpp +++ b/desmume/src/FIFO.cpp @@ -1,7 +1,7 @@ /* Copyright 2006 yopyop Copyright 2007 shash - Copyright 2007-2023 DeSmuME team + Copyright 2007-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -489,11 +489,11 @@ void DISP_FIFOrecv_LineOpaque(u32 *__restrict dst) } else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvertBuffer555To6665Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)(disp_fifo.buf + disp_fifo.head), dst, GPU_FRAMEBUFFER_NATIVE_WIDTH); } _DISP_FIFOrecv_LineAdvance(); diff --git a/desmume/src/GPU.cpp b/desmume/src/GPU.cpp index c3a24bd07..6a720e019 100644 --- a/desmume/src/GPU.cpp +++ b/desmume/src/GPU.cpp @@ -2,7 +2,7 @@ Copyright (C) 2006 yopyop Copyright (C) 2006-2007 Theo Berkau Copyright (C) 2007 shash - Copyright (C) 2008-2023 DeSmuME team + Copyright (C) 2008-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1032,12 +1032,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI { if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) ) { - ColorspaceConvertBuffer555To6665Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else { u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); - ColorspaceConvertBuffer555To6665Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom); } break; @@ -1047,12 +1047,12 @@ void GPUEngineBase::_TransitionLineNativeToCustom(GPUEngineCompositorInfo &compI { if ( (compInfo.line.widthCustom == GPU_FRAMEBUFFER_NATIVE_WIDTH) && (compInfo.line.renderCount == 1) ) { - ColorspaceConvertBuffer555To8888Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)compInfo.target.lineColorHeadNative, (u32 *)compInfo.target.lineColorHeadCustom, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else { u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); - ColorspaceConvertBuffer555To8888Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)compInfo.target.lineColorHeadNative, workingNativeBuffer32 + compInfo.line.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(compInfo.line, workingNativeBuffer32 + compInfo.line.blockOffsetNative, compInfo.target.lineColorHeadCustom); } break; @@ -3729,7 +3729,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA { if (OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); } this->_RenderLine_DispCapture_Copy(lineInfo, srcBPtr, dstCustomPtr, captureLengthExt); @@ -3755,7 +3755,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA else { u32 *workingNativeBuffer32 = this->_targetDisplay->GetWorkingNativeBuffer32(); - ColorspaceConvertBuffer555To8888Opaque((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)srcAPtr, workingNativeBuffer32 + lineInfo.blockOffsetNative, GPU_FRAMEBUFFER_NATIVE_WIDTH); CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, workingNativeBuffer32 + lineInfo.blockOffsetNative, this->_captureWorkingA32); srcAPtr = this->_captureWorkingA32; } @@ -3772,7 +3772,7 @@ void GPUEngineA::_RenderLine_DisplayCaptureCustom(const IOREG_DISPCAPCNT &DISPCA { if ((OUTPUTFORMAT == NDSColorFormat_BGR888_Rev) && (DISPCAPCNT.SrcB != 0)) { - ColorspaceConvertBuffer555To8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(this->_fifoLine16, (u32 *)srcBPtr, GPU_FRAMEBUFFER_NATIVE_WIDTH); } CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, srcBPtr, this->_captureWorkingB32); @@ -3919,7 +3919,7 @@ void GPUEngineA::_RenderLine_DisplayCapture(const GPUEngineCompositorInfo &compI { if (willReadNativeVRAM) { - ColorspaceConvertBuffer555To8888Opaque(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(vramNative16, (u32 *)vramCustom32, GPU_FRAMEBUFFER_NATIVE_WIDTH); } } @@ -4435,7 +4435,7 @@ void GPUEngineA::_HandleDisplayModeVRAM(const GPUEngineLineInfo &lineInfo) { const u16 *src = (u16 *)this->_VRAMCustomBlockPtr[DISPCNT.VRAM_Block] + lineInfo.blockOffsetCustom; u32 *dst = (u32 *)customBuffer + lineInfo.blockOffsetCustom; - ColorspaceConvertBuffer555To6665Opaque(src, dst, lineInfo.pixelCount); + ColorspaceConvertBuffer555xTo6665Opaque(src, dst, lineInfo.pixelCount); break; } @@ -5780,11 +5780,11 @@ void GPUSubsystem::_ConvertAndUpscaleForLoadstate(const NDSDisplayID displayID, switch (this->_display[displayID]->GetColorFormat()) { case NDSColorFormat_BGR666_Rev: - ColorspaceConvertBuffer555To6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; case NDSColorFormat_BGR888_Rev: - ColorspaceConvertBuffer555To8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; default: @@ -6212,11 +6212,11 @@ void NDSDisplay::ResolveLinesDisplayedNative() { if (this->_customColorFormat == NDSColorFormat_BGR888_Rev) { - ColorspaceConvertBuffer555To8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); } else { - ColorspaceConvertBuffer555To6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); + ColorspaceConvertBuffer555xTo6665Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH); } CopyLineExpandHinted<0x3FFF, true, false, false, 4>(lineInfo, working, dst); @@ -6256,7 +6256,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo) { case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR888_Rev: - ColorspaceConvertBuffer555To8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(src, working, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; default: @@ -6298,7 +6298,7 @@ void NDSDisplay::ResolveFramebufferToCustom(NDSDisplayInfo &mutableInfo) case NDSColorFormat_BGR666_Rev: case NDSColorFormat_BGR888_Rev: - ColorspaceConvertBuffer555To8888Opaque(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(this->_nativeBuffer16, (u32 *)this->_customBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); break; } } diff --git a/desmume/src/GPU_Operations_AVX2.cpp b/desmume/src/GPU_Operations_AVX2.cpp index 78e5e9823..d064a7370 100644 --- a/desmume/src/GPU_Operations_AVX2.cpp +++ b/desmume/src/GPU_Operations_AVX2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2021-2023 DeSmuME team + Copyright (C) 2021-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -1128,13 +1128,13 @@ FORCEINLINE void PixelOperation_AVX2::_copy16(GPUEngineCompositorInfo &compInfo, if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(src1, src32[2], src32[3]); } _mm256_store_si256( (v256u32 *)compInfo.target.lineColor32 + 0, src32[0] ); @@ -1205,13 +1205,13 @@ FORCEINLINE void PixelOperation_AVX2::_copyMask16(GPUEngineCompositorInfo &compI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(src1, src32[2], src32[3]); } passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); @@ -1304,13 +1304,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUp16(GPUEngineCompositorInfo &c if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_AVX2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_AVX2(src1, dst[2], dst[3]); } const v256u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm256_set1_epi32(0x1F000000) : _mm256_set1_epi32(0xFF000000); @@ -1377,13 +1377,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessUpMask16(GPUEngineCompositorInf if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_AVX2(src1, src32[2], src32[3]); } passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); @@ -1471,13 +1471,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDown16(GPUEngineCompositorInfo if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_AVX2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_AVX2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_AVX2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_AVX2(src1, dst[2], dst[3]); } const v256u32 alphaBits = _mm256_set1_epi32((OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? 0x1F000000 : 0xFF000000); @@ -1544,13 +1544,13 @@ FORCEINLINE void PixelOperation_AVX2::_brightnessDownMask16(GPUEngineCompositorI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_AVX2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_AVX2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_AVX2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_AVX2(src1, src32[2], src32[3]); } passMask16[0] = _mm256_permute4x64_epi64(passMask16[0], 0xD8); @@ -1674,13 +1674,13 @@ FORCEINLINE void PixelOperation_AVX2::_unknownEffectMask16(GPUEngineCompositorIn } else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_AVX2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo666X_AVX2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo666x_AVX2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo666x_AVX2(src1, tmpSrc[2], tmpSrc[3]); } else { - ColorspaceConvert555XTo888X_AVX2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo888X_AVX2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo888x_AVX2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo888x_AVX2(src1, tmpSrc[2], tmpSrc[3]); } switch (compInfo.renderState.colorEffect) diff --git a/desmume/src/GPU_Operations_SSE2.cpp b/desmume/src/GPU_Operations_SSE2.cpp index 3aca64de4..5621fb196 100644 --- a/desmume/src/GPU_Operations_SSE2.cpp +++ b/desmume/src/GPU_Operations_SSE2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2021-2023 DeSmuME team + Copyright (C) 2021-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -922,13 +922,13 @@ FORCEINLINE void PixelOperation_SSE2::_copy16(GPUEngineCompositorInfo &compInfo, if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(src1, src32[2], src32[3]); } _mm_store_si128( (v128u32 *)compInfo.target.lineColor32 + 0, src32[0] ); @@ -999,13 +999,13 @@ FORCEINLINE void PixelOperation_SSE2::_copyMask16(GPUEngineCompositorInfo &compI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555To6665Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To6665Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555To8888Opaque_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(src1, src32[2], src32[3]); } const v128u32 dst32[4] = { @@ -1104,13 +1104,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUp16(GPUEngineCompositorInfo &c if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_SSE2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_SSE2(src1, dst[2], dst[3]); } const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000); @@ -1182,13 +1182,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessUpMask16(GPUEngineCompositorInf if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_SSE2(src1, src32[2], src32[3]); } const v128u32 dst32[4] = { @@ -1275,13 +1275,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDown16(GPUEngineCompositorInfo if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo666X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo666x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo666x_SSE2(src1, dst[2], dst[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, dst[0], dst[1]); - ColorspaceConvert555XTo888X_SSE2(src1, dst[2], dst[3]); + ColorspaceConvert555xTo888x_SSE2(src0, dst[0], dst[1]); + ColorspaceConvert555xTo888x_SSE2(src1, dst[2], dst[3]); } const v128u32 alphaBits = (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) ? _mm_set1_epi32(0x1F000000) : _mm_set1_epi32(0xFF000000); @@ -1353,13 +1353,13 @@ FORCEINLINE void PixelOperation_SSE2::_brightnessDownMask16(GPUEngineCompositorI if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo666X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo666x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo666x_SSE2(src1, src32[2], src32[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, src32[0], src32[1]); - ColorspaceConvert555XTo888X_SSE2(src1, src32[2], src32[3]); + ColorspaceConvert555xTo888x_SSE2(src0, src32[0], src32[1]); + ColorspaceConvert555xTo888x_SSE2(src1, src32[2], src32[3]); } const v128u32 dst32[4] = { @@ -1494,13 +1494,13 @@ FORCEINLINE void PixelOperation_SSE2::_unknownEffectMask16(GPUEngineCompositorIn } else if (OUTPUTFORMAT == NDSColorFormat_BGR666_Rev) { - ColorspaceConvert555XTo666X_SSE2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo666X_SSE2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo666x_SSE2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo666x_SSE2(src1, tmpSrc[2], tmpSrc[3]); } else { - ColorspaceConvert555XTo888X_SSE2(src0, tmpSrc[0], tmpSrc[1]); - ColorspaceConvert555XTo888X_SSE2(src1, tmpSrc[2], tmpSrc[3]); + ColorspaceConvert555xTo888x_SSE2(src0, tmpSrc[0], tmpSrc[1]); + ColorspaceConvert555xTo888x_SSE2(src1, tmpSrc[2], tmpSrc[3]); } switch (compInfo.renderState.colorEffect) diff --git a/desmume/src/OGLRender.cpp b/desmume/src/OGLRender.cpp index f2b9093f9..0c68d4d0a 100644 --- a/desmume/src/OGLRender.cpp +++ b/desmume/src/OGLRender.cpp @@ -51,10 +51,10 @@ #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_SHORT_1_5_5_5_REV #endif #elif defined(OPENGL_VARIANT_ES) - #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_SHORT_5_5_5_1 + #define OGL_TEXTURE_SRC_CI_COLOR GL_UNSIGNED_BYTE #define OGL_TEXTURE_SRC_CI_FOG GL_UNSIGNED_BYTE #define OGL_TEXTURE_SRC_EDGE_COLOR GL_UNSIGNED_BYTE - #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_SHORT_5_5_5_1 + #define OGL_TEXTURE_SRC_TOON_TABLE GL_UNSIGNED_BYTE #else #error Unknown OpenGL variant. #endif @@ -2968,7 +2968,7 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, tempClearImageBuffer); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, OGL_TEXTURE_SRC_CI_COLOR, tempClearImageBuffer); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIDepthStencilID); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -2983,7 +2983,7 @@ Render3DError OpenGLRenderer_1_2::CreateFBOs() glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, GL_UNSIGNED_BYTE, tempClearImageBuffer); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, 0, GL_RGBA, OGL_TEXTURE_SRC_CI_FOG, tempClearImageBuffer); glBindTexture(GL_TEXTURE_2D, 0); @@ -3207,7 +3207,7 @@ Render3DError OpenGLRenderer_1_2::CreateGeometryPrograms() glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 32, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 32, 0, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, NULL); glGenTextures(1, &OGLRef.texEdgeColorTableID); glBindTexture(GL_TEXTURE_1D, OGLRef.texEdgeColorTableID); @@ -3215,7 +3215,7 @@ Render3DError OpenGLRenderer_1_2::CreateGeometryPrograms() glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 8, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA, 8, 0, GL_RGBA, OGL_TEXTURE_SRC_EDGE_COLOR, NULL); glGenTextures(1, &OGLRef.texFogDensityTableID); glBindTexture(GL_TEXTURE_1D, OGLRef.texFogDensityTableID); @@ -3223,7 +3223,7 @@ Render3DError OpenGLRenderer_1_2::CreateGeometryPrograms() glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); - glTexImage1D(GL_TEXTURE_1D, 0, GL_RED, 32, 0, GL_RED, GL_UNSIGNED_BYTE, NULL); + glTexImage1D(GL_TEXTURE_1D, 0, GL_LUMINANCE, 32, 0, GL_RED, GL_UNSIGNED_BYTE, NULL); glActiveTexture(GL_TEXTURE0); OGLGeometryFlags programFlags; @@ -3929,7 +3929,7 @@ Render3DError OpenGLRenderer_1_2::UploadClearImage(const u16 *__restrict colorBu } } - const bool didColorChange = (memcmp(OGLRef.workingCIColorBuffer, colorBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16)) != 0); + const bool didColorChange = (memcmp(OGLRef.workingCIColorBuffer16, colorBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16)) != 0); const bool didDepthStencilChange = (memcmp(OGLRef.workingCIDepthStencilBuffer[this->_clearImageIndex], OGLRef.workingCIDepthStencilBuffer[this->_clearImageIndex ^ 0x01], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(GLuint)) != 0); const bool didFogAttributesChange = this->_enableFog && this->_deviceInfo.isFogSupported && (memcmp(OGLRef.workingCIFogAttributesBuffer[this->_clearImageIndex], OGLRef.workingCIFogAttributesBuffer[this->_clearImageIndex ^ 0x01], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(GLuint)) != 0); @@ -3937,9 +3937,18 @@ Render3DError OpenGLRenderer_1_2::UploadClearImage(const u16 *__restrict colorBu if (didColorChange) { - memcpy(OGLRef.workingCIColorBuffer, colorBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16)); + memcpy(OGLRef.workingCIColorBuffer16, colorBuffer, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * sizeof(u16)); glBindTexture(GL_TEXTURE_2D, OGLRef.texCIColorID); - glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, OGL_TEXTURE_SRC_CI_COLOR, OGLRef.workingCIColorBuffer); + + if (OGL_TEXTURE_SRC_CI_COLOR == GL_UNSIGNED_BYTE) + { + ColorspaceConvertBuffer5551To8888(OGLRef.workingCIColorBuffer16, OGLRef.workingCIColorBuffer32, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, OGL_TEXTURE_SRC_CI_COLOR, OGLRef.workingCIColorBuffer32); + } + else + { + glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, GPU_FRAMEBUFFER_NATIVE_WIDTH, GPU_FRAMEBUFFER_NATIVE_HEIGHT, GL_RGBA, OGL_TEXTURE_SRC_CI_COLOR, OGLRef.workingCIColorBuffer16); + } } if (didDepthStencilChange) @@ -4535,7 +4544,16 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D_State &renderState, co { glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); glBindTexture(GL_TEXTURE_1D, OGLRef.texToonTableID); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, renderState.toonTable16); + + if (OGL_TEXTURE_SRC_TOON_TABLE == GL_UNSIGNED_BYTE) + { + ColorspaceConvertBuffer5551To8888(renderState.toonTable16, OGLRef.toonTable32, 32); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, OGLRef.toonTable32); + } + else + { + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, renderState.toonTable16); + } } } #if !defined(GL_ES_VERSION_3_0) @@ -4560,6 +4578,18 @@ Render3DError OpenGLRenderer_1_2::BeginRender(const GFX3D_State &renderState, co } #endif } + else + { + if (this->isShaderSupported && this->isFBOSupported) + { + // Even with no polygons to draw, we always need to set these 3 flags so that + // glDrawBuffers() can reference the correct set of FBO attachments using + // OGLGeometryFlags.DrawBuffersMode. + this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = 1; + } + } glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -5651,120 +5681,143 @@ Render3DError OpenGLRenderer_2_0::BeginRender(const GFX3D_State &renderState, co this->_enableAlphaBlending = (renderState.DISP3DCNT.EnableAlphaBlending) ? true : false; - glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID); - glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID); - - // Only copy as much vertex data as we need to, since this can be a potentially large upload size. - glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(NDSVertex) * renderGList.rawVertCount, renderGList.rawVtxList); - - // Generate the clipped polygon list. - bool renderNeedsToonTable = false; - - for (size_t i = 0, vertIndexCount = 0; i < this->_clippedPolyCount; i++) + if (this->_clippedPolyCount > 0) { - const CPoly &cPoly = this->_clippedPolyList[i]; - const POLY &rawPoly = this->_rawPolyList[cPoly.index]; - const size_t polyType = rawPoly.type; + glBindBuffer(GL_ARRAY_BUFFER, OGLRef.vboGeometryVtxID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, OGLRef.iboGeometryIndexID); - for (size_t j = 0; j < polyType; j++) + // Only copy as much vertex data as we need to, since this can be a potentially large upload size. + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(NDSVertex) * renderGList.rawVertCount, renderGList.rawVtxList); + + // Generate the clipped polygon list. + bool renderNeedsToonTable = false; + + for (size_t i = 0, vertIndexCount = 0; i < this->_clippedPolyCount; i++) { - const GLushort vertIndex = rawPoly.vertIndexes[j]; + const CPoly &cPoly = this->_clippedPolyList[i]; + const POLY &rawPoly = this->_rawPolyList[cPoly.index]; + const size_t polyType = rawPoly.type; - // While we're looping through our vertices, add each vertex index to - // a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional - // vertices here to convert them to GL_TRIANGLES, which are much easier - // to work with and won't be deprecated in future OpenGL versions. - OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex; - if (!GFX3D_IsPolyWireframe(rawPoly) && (rawPoly.vtxFormat == GFX3D_QUADS || rawPoly.vtxFormat == GFX3D_QUAD_STRIP)) + for (size_t j = 0; j < polyType; j++) { - if (j == 2) - { - OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex; - } - else if (j == 3) + const GLushort vertIndex = rawPoly.vertIndexes[j]; + + // While we're looping through our vertices, add each vertex index to + // a buffer. For GFX3D_QUADS and GFX3D_QUAD_STRIP, we also add additional + // vertices here to convert them to GL_TRIANGLES, which are much easier + // to work with and won't be deprecated in future OpenGL versions. + OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex; + if (!GFX3D_IsPolyWireframe(rawPoly) && (rawPoly.vtxFormat == GFX3D_QUADS || rawPoly.vtxFormat == GFX3D_QUAD_STRIP)) { - OGLRef.vertIndexBuffer[vertIndexCount++] = rawPoly.vertIndexes[0]; + if (j == 2) + { + OGLRef.vertIndexBuffer[vertIndexCount++] = vertIndex; + } + else if (j == 3) + { + OGLRef.vertIndexBuffer[vertIndexCount++] = rawPoly.vertIndexes[0]; + } } } + + renderNeedsToonTable = renderNeedsToonTable || (rawPoly.attribute.Mode == POLYGON_MODE_TOONHIGHLIGHT); + + // Get the texture that is to be attached to this polygon. + this->_textureList[i] = this->GetLoadedTextureFromPolygon(rawPoly, this->_enableTextureSampling); } - renderNeedsToonTable = renderNeedsToonTable || (rawPoly.attribute.Mode == POLYGON_MODE_TOONHIGHLIGHT); + // Replace the entire index buffer as a hint to the driver that we can orphan the index buffer and + // avoid a synchronization cost. + glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(OGLRef.vertIndexBuffer), OGLRef.vertIndexBuffer); - // Get the texture that is to be attached to this polygon. - this->_textureList[i] = this->GetLoadedTextureFromPolygon(rawPoly, this->_enableTextureSampling); - } - - // Replace the entire index buffer as a hint to the driver that we can orphan the index buffer and - // avoid a synchronization cost. - glBufferSubData(GL_ELEMENT_ARRAY_BUFFER, 0, sizeof(OGLRef.vertIndexBuffer), OGLRef.vertIndexBuffer); - - // Set up rendering states that will remain constant for the entire frame. - this->_pendingRenderStates.enableAntialiasing = (renderState.DISP3DCNT.EnableAntialiasing) ? GL_TRUE : GL_FALSE; - this->_pendingRenderStates.enableFogAlphaOnly = (renderState.DISP3DCNT.FogOnlyAlpha) ? GL_TRUE : GL_FALSE; - this->_pendingRenderStates.clearPolyID = this->_clearAttributes.opaquePolyID; - this->_pendingRenderStates.clearDepth = (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF; - this->_pendingRenderStates.alphaTestRef = divide5bitBy31_LUT[renderState.alphaTestRef]; - - if (this->_enableFog && this->_deviceInfo.isFogSupported) - { - this->_fogProgramKey.key = 0; - this->_fogProgramKey.offset = renderState.fogOffset & 0x7FFF; - this->_fogProgramKey.shift = renderState.fogShift; + // Set up rendering states that will remain constant for the entire frame. + this->_pendingRenderStates.enableAntialiasing = (renderState.DISP3DCNT.EnableAntialiasing) ? GL_TRUE : GL_FALSE; + this->_pendingRenderStates.enableFogAlphaOnly = (renderState.DISP3DCNT.FogOnlyAlpha) ? GL_TRUE : GL_FALSE; + this->_pendingRenderStates.clearPolyID = this->_clearAttributes.opaquePolyID; + this->_pendingRenderStates.clearDepth = (GLfloat)this->_clearAttributes.depth / (GLfloat)0x00FFFFFF; + this->_pendingRenderStates.alphaTestRef = divide5bitBy31_LUT[renderState.alphaTestRef]; - this->_pendingRenderStates.fogColor.r = divide5bitBy31_LUT[(renderState.fogColor ) & 0x0000001F]; - this->_pendingRenderStates.fogColor.g = divide5bitBy31_LUT[(renderState.fogColor >> 5) & 0x0000001F]; - this->_pendingRenderStates.fogColor.b = divide5bitBy31_LUT[(renderState.fogColor >> 10) & 0x0000001F]; - this->_pendingRenderStates.fogColor.a = divide5bitBy31_LUT[(renderState.fogColor >> 16) & 0x0000001F]; - this->_pendingRenderStates.fogOffset = (GLfloat)(renderState.fogOffset & 0x7FFF) / 32767.0f; - this->_pendingRenderStates.fogStep = (GLfloat)(0x0400 >> renderState.fogShift) / 32767.0f; + if (this->_enableFog && this->_deviceInfo.isFogSupported) + { + this->_fogProgramKey.key = 0; + this->_fogProgramKey.offset = renderState.fogOffset & 0x7FFF; + this->_fogProgramKey.shift = renderState.fogShift; + + this->_pendingRenderStates.fogColor.r = divide5bitBy31_LUT[(renderState.fogColor ) & 0x0000001F]; + this->_pendingRenderStates.fogColor.g = divide5bitBy31_LUT[(renderState.fogColor >> 5) & 0x0000001F]; + this->_pendingRenderStates.fogColor.b = divide5bitBy31_LUT[(renderState.fogColor >> 10) & 0x0000001F]; + this->_pendingRenderStates.fogColor.a = divide5bitBy31_LUT[(renderState.fogColor >> 16) & 0x0000001F]; + this->_pendingRenderStates.fogOffset = (GLfloat)(renderState.fogOffset & 0x7FFF) / 32767.0f; + this->_pendingRenderStates.fogStep = (GLfloat)(0x0400 >> renderState.fogShift) / 32767.0f; + + u8 fogDensityTable[32]; + for (size_t i = 0; i < 32; i++) + { + fogDensityTable[i] = (renderState.fogDensityTable[i] == 127) ? 255 : renderState.fogDensityTable[i] << 1; + } + + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); + glBindTexture(GL_TEXTURE_1D, OGLRef.texFogDensityTableID); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RED, GL_UNSIGNED_BYTE, fogDensityTable); + } - u8 fogDensityTable[32]; - for (size_t i = 0; i < 32; i++) + if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) { - fogDensityTable[i] = (renderState.fogDensityTable[i] == 127) ? 255 : renderState.fogDensityTable[i] << 1; + const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF; + Color4u8 edgeColor32[8]; + + for (size_t i = 0; i < 8; i++) + { + edgeColor32[i].value = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); + } + + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); + glBindTexture(GL_TEXTURE_1D, OGLRef.texEdgeColorTableID); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 8, GL_RGBA, OGL_TEXTURE_SRC_EDGE_COLOR, edgeColor32); } - glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); - glBindTexture(GL_TEXTURE_1D, OGLRef.texFogDensityTableID); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RED, GL_UNSIGNED_BYTE, fogDensityTable); - } - - if (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) - { - const u8 alpha8 = (renderState.DISP3DCNT.EnableAntialiasing) ? 0x80 : 0xFF; - Color4u8 edgeColor32[8]; + // Setup render states + this->_geometryProgramFlags.value = 0; + this->_geometryProgramFlags.EnableWDepth = renderState.SWAP_BUFFERS.DepthMode; + this->_geometryProgramFlags.EnableAlphaTest = renderState.DISP3DCNT.EnableAlphaTest; + this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0; + this->_geometryProgramFlags.ToonShadingMode = renderState.DISP3DCNT.PolygonShading; + this->_geometryProgramFlags.EnableFog = (this->_enableFog && this->_deviceInfo.isFogSupported) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = (this->_isDepthLEqualPolygonFacingSupported) ? 1 : 0; - for (size_t i = 0; i < 8; i++) + this->_SetupGeometryShaders(this->_geometryProgramFlags); + + if (renderNeedsToonTable) { - edgeColor32[i].value = COLOR555TO8888(renderState.edgeMarkColorTable[i] & 0x7FFF, alpha8); + glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); + glBindTexture(GL_TEXTURE_1D, OGLRef.texToonTableID); + + if (OGL_TEXTURE_SRC_TOON_TABLE == GL_UNSIGNED_BYTE) + { + ColorspaceConvertBuffer5551To8888(renderState.toonTable16, OGLRef.toonTable32, 32); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, OGLRef.toonTable32); + } + else + { + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, renderState.toonTable16); + } } - - glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); - glBindTexture(GL_TEXTURE_1D, OGLRef.texEdgeColorTableID); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 8, GL_RGBA, OGL_TEXTURE_SRC_EDGE_COLOR, edgeColor32); } - - // Setup render states - this->_geometryProgramFlags.value = 0; - this->_geometryProgramFlags.EnableWDepth = renderState.SWAP_BUFFERS.DepthMode; - this->_geometryProgramFlags.EnableAlphaTest = renderState.DISP3DCNT.EnableAlphaTest; - this->_geometryProgramFlags.EnableTextureSampling = (this->_enableTextureSampling) ? 1 : 0; - this->_geometryProgramFlags.ToonShadingMode = renderState.DISP3DCNT.PolygonShading; - this->_geometryProgramFlags.EnableFog = (this->_enableFog && this->_deviceInfo.isFogSupported) ? 1 : 0; - this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark && this->_deviceInfo.isEdgeMarkSupported) ? 1 : 0; - this->_geometryProgramFlags.OpaqueDrawMode = (this->_isDepthLEqualPolygonFacingSupported) ? 1 : 0; - - this->_SetupGeometryShaders(this->_geometryProgramFlags); - - if (renderNeedsToonTable) + else { - glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_LookupTable); - glBindTexture(GL_TEXTURE_1D, OGLRef.texToonTableID); - glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 32, GL_RGBA, OGL_TEXTURE_SRC_TOON_TABLE, renderState.toonTable16); + if (this->isFBOSupported) + { + // Even with no polygons to draw, we always need to set these 3 flags so that + // glDrawBuffers() can reference the correct set of FBO attachments using + // OGLGeometryFlags.DrawBuffersMode. + this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = 1; + } } glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); - glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); glDepthMask(GL_TRUE); diff --git a/desmume/src/OGLRender.h b/desmume/src/OGLRender.h index 83014b204..2153e3960 100644 --- a/desmume/src/OGLRender.h +++ b/desmume/src/OGLRender.h @@ -34,7 +34,6 @@ #include #define __gles2_gl2_h_ // Guard against including the gl2.h file. #include // "gl3ext.h" is just a stub file. The real extension header is "gl2ext.h". - #include // Ignore dynamic linking #define OGLEXT(procPtr, func) @@ -689,7 +688,9 @@ struct OGLRenderRef GLfloat *texCoord2fBuffer; GLfloat *color4fBuffer; CACHE_ALIGN GLushort vertIndexBuffer[OGLRENDER_VERT_INDEX_BUFFER_COUNT]; - CACHE_ALIGN GLushort workingCIColorBuffer[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; + CACHE_ALIGN GLuint toonTable32[32]; + CACHE_ALIGN GLushort workingCIColorBuffer16[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; + CACHE_ALIGN GLuint workingCIColorBuffer32[GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLuint workingCIDepthStencilBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; CACHE_ALIGN GLuint workingCIFogAttributesBuffer[2][GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT]; }; diff --git a/desmume/src/OGLRender_3_2.cpp b/desmume/src/OGLRender_3_2.cpp index c92d8019d..6ab93abb8 100644 --- a/desmume/src/OGLRender_3_2.cpp +++ b/desmume/src/OGLRender_3_2.cpp @@ -2477,6 +2477,15 @@ Render3DError OpenGLRenderer_3_2::BeginRender(const GFX3D_State &renderState, co this->_SetupGeometryShaders(this->_geometryProgramFlags); } + else + { + // Even with no polygons to draw, we always need to set these 3 flags so that + // glDrawBuffers() can reference the correct set of FBO attachments using + // OGLGeometryFlags.DrawBuffersMode. + this->_geometryProgramFlags.EnableFog = (this->_enableFog) ? 1 : 0; + this->_geometryProgramFlags.EnableEdgeMark = (this->_enableEdgeMark) ? 1 : 0; + this->_geometryProgramFlags.OpaqueDrawMode = 1; + } glReadBuffer(OGL_COLOROUT_ATTACHMENT_ID); glColorMask(GL_TRUE, GL_TRUE, GL_TRUE, GL_TRUE); @@ -2858,19 +2867,19 @@ Render3DError OpenGLRenderer_3_2::SetFramebufferSize(size_t w, size_t h) } glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FinalColor); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_DepthStencil); glTexImage2D(GL_TEXTURE_2D, 0, GL_DEPTH24_STENCIL8, (GLsizei)w, (GLsizei)h, 0, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, NULL); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GColor); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_GPolyID); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glActiveTexture(GL_TEXTURE0 + OGLTextureUnitID_FogAttr); - glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); + glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, (GLsizei)w, (GLsizei)h, 0, GL_RGBA, GL_UNSIGNED_BYTE, NULL); glActiveTexture(GL_TEXTURE0); diff --git a/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp b/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp index 5b53b8ef3..18a9a33f8 100644 --- a/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp +++ b/desmume/src/frontend/cocoa/ClientAVCaptureObject.cpp @@ -484,7 +484,7 @@ void ClientAVCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam & const u16 *__restrict src = (const u16 *__restrict)param.src; u8 *__restrict dst = param.dst; - ColorspaceConvertBuffer555XTo888(src, dst, param.frameWidth * lineCount); + ColorspaceConvertBuffer555xTo888(src, dst, param.frameWidth * lineCount); } //converts 32bpp to 24bpp and flips @@ -494,7 +494,7 @@ void ClientAVCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam & const u32 *__restrict src = (const u32 *__restrict)param.src; u8 *__restrict dst = param.dst; - ColorspaceConvertBuffer888XTo888(src, dst, param.frameWidth * lineCount); + ColorspaceConvertBuffer888xTo888(src, dst, param.frameWidth * lineCount); } void ClientAVCaptureObject::CaptureVideoFrame(const void *srcVideoFrame, const size_t inFrameWidth, const size_t inFrameHeight, const NDSColorFormat colorFormat) diff --git a/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp b/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp old mode 100755 new mode 100644 index e2734fd79..429f3007e --- a/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp +++ b/desmume/src/frontend/cocoa/OGLDisplayOutput.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2023 DeSmuME team + Copyright (C) 2014-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -4719,7 +4719,7 @@ void OGLClientSharedData::FetchNativeDisplayToSrcClone(const NDSDisplayInfo *dis return; } - ColorspaceConvertBuffer555To8888Opaque(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(displayInfoList[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false; if (needsLock) @@ -4744,7 +4744,7 @@ void OGLClientSharedData::FetchCustomDisplayToSrcClone(const NDSDisplayInfo *dis return; } - ColorspaceConvertBuffer888XTo8888Opaque((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer888xTo8888Opaque((u32 *)displayInfoList[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); this->_srcCloneNeedsUpdate[displayID][bufferIndex] = false; if (needsLock) @@ -5628,14 +5628,14 @@ OGLImage::OGLImage(OGLContextInfo *contextInfo, GLsizei imageWidth, GLsizei imag }; // Set up VBOs - glGenBuffersARB(1, &_vboVertexID); - glGenBuffersARB(1, &_vboTexCoordID); + glGenBuffers(1, &_vboVertexID); + glGenBuffers(1, &_vboTexCoordID); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(vtxBuffer), vtxBuffer, GL_STATIC_DRAW_ARB); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(_texCoordBuffer), _texCoordBuffer, GL_STATIC_DRAW_ARB); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + glBindBuffer(GL_ARRAY_BUFFER, _vboVertexID); + glBufferData(GL_ARRAY_BUFFER, sizeof(vtxBuffer), vtxBuffer, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); + glBufferData(GL_ARRAY_BUFFER, sizeof(_texCoordBuffer), _texCoordBuffer, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); // Set up VAO glGenVertexArraysDESMUME(1, &_vaoMainStatesID); @@ -5643,9 +5643,9 @@ OGLImage::OGLImage(OGLContextInfo *contextInfo, GLsizei imageWidth, GLsizei imag if (contextInfo->IsShaderSupported()) { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboVertexID); glVertexAttribPointer(OGLVertexAttributeID_Position, 2, GL_INT, GL_FALSE, 0, 0); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, 0, 0); glEnableVertexAttribArray(OGLVertexAttributeID_Position); @@ -5653,9 +5653,9 @@ OGLImage::OGLImage(OGLContextInfo *contextInfo, GLsizei imageWidth, GLsizei imag } else { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboVertexID); glVertexPointer(2, GL_INT, 0, 0); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); glTexCoordPointer(2, GL_FLOAT, 0, 0); glEnableClientState(GL_VERTEX_ARRAY); @@ -5721,8 +5721,8 @@ OGLImage::~OGLImage() _isVAOPresent = false; } - glDeleteBuffersARB(1, &this->_vboVertexID); - glDeleteBuffersARB(1, &this->_vboTexCoordID); + glDeleteBuffers(1, &this->_vboVertexID); + glDeleteBuffers(1, &this->_vboTexCoordID); glDeleteTextures(1, &this->_texCPUFilterDstID); glDeleteTextures(1, &this->_texVideoInputDataID); @@ -6223,8 +6223,8 @@ void OGLImage::ProcessOGL() // Output this->_texVideoOutputID = this->_texVideoPixelScalerID; - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboTexCoordID); - glBufferSubDataARB(GL_ARRAY_BUFFER_ARB, 0, sizeof(this->_texCoordBuffer), this->_texCoordBuffer); + glBindBuffer(GL_ARRAY_BUFFER, this->_vboTexCoordID); + glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(this->_texCoordBuffer), this->_texCoordBuffer); } void OGLImage::RenderOGL() @@ -6330,22 +6330,22 @@ OGLHUDLayer::OGLHUDLayer(OGLVideoOutput *oglVO) _workingCharBufferList->reserve(16); // Set up VBOs - glGenBuffersARB(1, &_vboPositionVertexID); - glGenBuffersARB(1, &_vboColorVertexID); - glGenBuffersARB(1, &_vboTexCoordID); - glGenBuffersARB(1, &_vboElementID); - - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboPositionVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboColorVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_COLOR_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); - - glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, _vboElementID); - glBufferDataARB(GL_ELEMENT_ARRAY_BUFFER_ARB, sizeof(GLshort) * HUD_TOTAL_ELEMENTS * 6, NULL, GL_STATIC_DRAW_ARB); - GLshort *idxBufferPtr = (GLshort *)glMapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + glGenBuffers(1, &_vboPositionVertexID); + glGenBuffers(1, &_vboColorVertexID); + glGenBuffers(1, &_vboTexCoordID); + glGenBuffers(1, &_vboElementID); + + glBindBuffer(GL_ARRAY_BUFFER, _vboPositionVertexID); + glBufferData(GL_ARRAY_BUFFER, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, _vboColorVertexID); + glBufferData(GL_ARRAY_BUFFER, HUD_VERTEX_COLOR_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); + glBufferData(GL_ARRAY_BUFFER, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); + + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _vboElementID); + glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLshort) * HUD_TOTAL_ELEMENTS * 6, NULL, GL_STATIC_DRAW); + GLshort *idxBufferPtr = (GLshort *)glMapBuffer(GL_ELEMENT_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); for (size_t i = 0, j = 0, k = 0; i < HUD_TOTAL_ELEMENTS; i++, j+=6, k+=4) { @@ -6357,8 +6357,8 @@ OGLHUDLayer::OGLHUDLayer(OGLVideoOutput *oglVO) idxBufferPtr[j+5] = k+0; } - glUnmapBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB); - glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, 0); + glUnmapBuffer(GL_ELEMENT_ARRAY_BUFFER); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); // Set up VAO glGenVertexArraysDESMUME(1, &_vaoMainStatesID); @@ -6366,13 +6366,13 @@ OGLHUDLayer::OGLHUDLayer(OGLVideoOutput *oglVO) if (oglVO->GetContextInfo()->IsShaderSupported()) { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboPositionVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboPositionVertexID); glVertexAttribPointer(OGLVertexAttributeID_Position, 2, GL_FLOAT, GL_FALSE, 0, NULL); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboColorVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboColorVertexID); glVertexAttribPointer(OGLVertexAttributeID_Color, 4, GL_UNSIGNED_BYTE, GL_TRUE, 0, NULL); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, 0, NULL); - glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, _vboElementID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _vboElementID); glEnableVertexAttribArray(OGLVertexAttributeID_Position); glEnableVertexAttribArray(OGLVertexAttributeID_Color); @@ -6380,13 +6380,13 @@ OGLHUDLayer::OGLHUDLayer(OGLVideoOutput *oglVO) } else { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboPositionVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboPositionVertexID); glVertexPointer(2, GL_FLOAT, 0, NULL); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboColorVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboColorVertexID); glColorPointer(4, GL_UNSIGNED_BYTE, 0, NULL); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); glTexCoordPointer(2, GL_FLOAT, 0, NULL); - glBindBufferARB(GL_ELEMENT_ARRAY_BUFFER_ARB, _vboElementID); + glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, _vboElementID); glEnableClientState(GL_VERTEX_ARRAY); glEnableClientState(GL_COLOR_ARRAY); @@ -6405,10 +6405,10 @@ OGLHUDLayer::~OGLHUDLayer() } glDeleteVertexArraysDESMUME(1, &this->_vaoMainStatesID); - glDeleteBuffersARB(1, &this->_vboPositionVertexID); - glDeleteBuffersARB(1, &this->_vboColorVertexID); - glDeleteBuffersARB(1, &this->_vboTexCoordID); - glDeleteBuffersARB(1, &this->_vboElementID); + glDeleteBuffers(1, &this->_vboPositionVertexID); + glDeleteBuffers(1, &this->_vboColorVertexID); + glDeleteBuffers(1, &this->_vboTexCoordID); + glDeleteBuffers(1, &this->_vboElementID); glDeleteTextures(1, &this->_texCharMap); @@ -6517,25 +6517,25 @@ void OGLHUDLayer::_UpdateVerticesOGL() return; } - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboPositionVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); - float *vtxPositionBufferPtr = (float *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + glBindBuffer(GL_ARRAY_BUFFER, this->_vboPositionVertexID); + glBufferData(GL_ARRAY_BUFFER, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW); + float *vtxPositionBufferPtr = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); this->_output->SetHUDPositionVertices((float)this->_output->GetViewportWidth(), (float)this->_output->GetViewportHeight(), vtxPositionBufferPtr); - glUnmapBufferARB(GL_ARRAY_BUFFER_ARB); + glUnmapBuffer(GL_ARRAY_BUFFER); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboColorVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_COLOR_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); - uint32_t *vtxColorBufferPtr = (uint32_t *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + glBindBuffer(GL_ARRAY_BUFFER, this->_vboColorVertexID); + glBufferData(GL_ARRAY_BUFFER, HUD_VERTEX_COLOR_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW); + uint32_t *vtxColorBufferPtr = (uint32_t *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); this->_output->SetHUDColorVertices(vtxColorBufferPtr); - glUnmapBufferARB(GL_ARRAY_BUFFER_ARB); + glUnmapBuffer(GL_ARRAY_BUFFER); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW_ARB); - float *texCoordBufferPtr = (float *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + glBindBuffer(GL_ARRAY_BUFFER, this->_vboTexCoordID); + glBufferData(GL_ARRAY_BUFFER, HUD_VERTEX_ATTRIBUTE_BUFFER_SIZE, NULL, GL_STREAM_DRAW); + float *texCoordBufferPtr = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); this->_output->SetHUDTextureCoordinates(texCoordBufferPtr); - glUnmapBufferARB(GL_ARRAY_BUFFER_ARB); + glUnmapBuffer(GL_ARRAY_BUFFER); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + glBindBuffer(GL_ARRAY_BUFFER, 0); this->_output->ClearHUDNeedsUpdate(); } @@ -6725,14 +6725,14 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO) pthread_rwlock_init(&_cpuFilterRWLock[NDSDisplayID_Touch][1], NULL); // Set up VBOs - glGenBuffersARB(1, &_vboVertexID); - glGenBuffersARB(1, &_vboTexCoordID); + glGenBuffers(1, &_vboVertexID); + glGenBuffers(1, &_vboTexCoordID); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLfloat) * (4 * 8), NULL, GL_STATIC_DRAW_ARB); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, sizeof(GLfloat) * (4 * 8), NULL, GL_STREAM_DRAW_ARB); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, 0); + glBindBuffer(GL_ARRAY_BUFFER, _vboVertexID); + glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * (4 * 8), NULL, GL_STATIC_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); + glBufferData(GL_ARRAY_BUFFER, sizeof(GLfloat) * (4 * 8), NULL, GL_STREAM_DRAW); + glBindBuffer(GL_ARRAY_BUFFER, 0); // Set up VAO glGenVertexArraysDESMUME(1, &_vaoMainStatesID); @@ -6740,9 +6740,9 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO) if (this->_output->GetContextInfo()->IsShaderSupported()) { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboVertexID); glVertexAttribPointer(OGLVertexAttributeID_Position, 2, GL_FLOAT, GL_FALSE, 0, NULL); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); glVertexAttribPointer(OGLVertexAttributeID_TexCoord0, 2, GL_FLOAT, GL_FALSE, 0, NULL); glEnableVertexAttribArray(OGLVertexAttributeID_Position); @@ -6750,9 +6750,9 @@ OGLDisplayLayer::OGLDisplayLayer(OGLVideoOutput *oglVO) } else { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboVertexID); + glBindBuffer(GL_ARRAY_BUFFER, _vboVertexID); glVertexPointer(2, GL_FLOAT, 0, NULL); - glBindBufferARB(GL_ARRAY_BUFFER_ARB, _vboTexCoordID); + glBindBuffer(GL_ARRAY_BUFFER, _vboTexCoordID); glTexCoordPointer(2, GL_FLOAT, 0, NULL); glEnableClientState(GL_VERTEX_ARRAY); @@ -6816,8 +6816,8 @@ OGLDisplayLayer::~OGLDisplayLayer() glDeleteVertexArraysDESMUME(1, &this->_vaoMainStatesID); } - glDeleteBuffersARB(1, &this->_vboVertexID); - glDeleteBuffersARB(1, &this->_vboTexCoordID); + glDeleteBuffers(1, &this->_vboVertexID); + glDeleteBuffers(1, &this->_vboTexCoordID); if (_output->GetContextInfo()->IsShaderSupported()) { @@ -6863,10 +6863,10 @@ void OGLDisplayLayer::_UpdateRotationScaleOGL() void OGLDisplayLayer::_UpdateVerticesOGL() { - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboVertexID); - float *vtxBufferPtr = (float *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + glBindBuffer(GL_ARRAY_BUFFER, this->_vboVertexID); + float *vtxBufferPtr = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); this->_output->SetScreenVertices(vtxBufferPtr); - glUnmapBufferARB(GL_ARRAY_BUFFER_ARB); + glUnmapBuffer(GL_ARRAY_BUFFER); this->_needUpdateVertices = false; } @@ -7480,15 +7480,15 @@ void OGLDisplayLayer::ProcessOGL() } // Update the texture coordinates - glBindBufferARB(GL_ARRAY_BUFFER_ARB, this->_vboTexCoordID); - glBufferDataARB(GL_ARRAY_BUFFER_ARB, (4 * 8) * sizeof(GLfloat), NULL, GL_STREAM_DRAW_ARB); - float *texCoordPtr = (float *)glMapBufferARB(GL_ARRAY_BUFFER_ARB, GL_WRITE_ONLY_ARB); + glBindBuffer(GL_ARRAY_BUFFER, this->_vboTexCoordID); + glBufferData(GL_ARRAY_BUFFER, (4 * 8) * sizeof(GLfloat), NULL, GL_STREAM_DRAW); + float *texCoordPtr = (float *)glMapBuffer(GL_ARRAY_BUFFER, GL_WRITE_ONLY_ARB); this->_output->SetScreenTextureCoordinates((float)width[NDSDisplayID_Main], (float)height[NDSDisplayID_Main], (float)width[NDSDisplayID_Touch], (float)height[NDSDisplayID_Touch], texCoordPtr); - glUnmapBufferARB(GL_ARRAY_BUFFER_ARB); + glUnmapBuffer(GL_ARRAY_BUFFER); // OpenGL shader-based filters can modify the viewport, so it needs to be reset here. glViewport(0, 0, this->_output->GetViewportWidth(), this->_output->GetViewportHeight()); diff --git a/desmume/src/frontend/cocoa/OGLDisplayOutput.h b/desmume/src/frontend/cocoa/OGLDisplayOutput.h index 5136d88e5..07fbd59cc 100644 --- a/desmume/src/frontend/cocoa/OGLDisplayOutput.h +++ b/desmume/src/frontend/cocoa/OGLDisplayOutput.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2014-2022 DeSmuME team + Copyright (C) 2014-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,16 +18,18 @@ #ifndef _OGLDISPLAYOUTPUT_H_ #define _OGLDISPLAYOUTPUT_H_ -#ifndef _OGLDISPLAYOUTPUT_3_2_H_ - #if defined(__APPLE__) - #include - #include + #ifdef _OGLDISPLAYOUTPUT_3_2_H_ + #include + #include + #else + #include + #include + #endif + #include #endif -#endif // _OGLDISPLAYOUTPUT_3_2_H_ - #include #include #include @@ -47,7 +49,7 @@ enum ShaderSupportTier ShaderSupport_MidTier = 3, ShaderSupport_HighTier = 4, ShaderSupport_TopTier = 5, - ShaderSupport_FutureTier = 6, + ShaderSupport_FutureTier = 6 }; struct OGLProcessedFrameInfo diff --git a/desmume/src/frontend/cocoa/OGLDisplayOutput_3_2.h b/desmume/src/frontend/cocoa/OGLDisplayOutput_3_2.h index a76fe75e4..fb3107a9a 100644 --- a/desmume/src/frontend/cocoa/OGLDisplayOutput_3_2.h +++ b/desmume/src/frontend/cocoa/OGLDisplayOutput_3_2.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2015 DeSmuME team + Copyright (C) 2015-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -18,12 +18,6 @@ #ifndef _OGLDISPLAYOUTPUT_3_2_H_ #define _OGLDISPLAYOUTPUT_3_2_H_ -#if defined(__APPLE__) - #include - #include - #include -#endif - #include "OGLDisplayOutput.h" class OGLContextInfo_3_2 : public OGLContextInfo diff --git a/desmume/src/frontend/cocoa/cocoa_rom.mm b/desmume/src/frontend/cocoa/cocoa_rom.mm index be5091bf2..c9094bca7 100644 --- a/desmume/src/frontend/cocoa/cocoa_rom.mm +++ b/desmume/src/frontend/cocoa/cocoa_rom.mm @@ -1,6 +1,6 @@ /* Copyright (C) 2011 Roger Manuel - Copyright (C) 2011-2022 DeSmuME team + Copyright (C) 2011-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -674,7 +674,7 @@ void RomIconToRGBA8888(uint32_t *bitmapData) // The first entry always represents the alpha, so just set it to 0. const uint16_t *clut4 = (uint16_t *)ndsRomBanner.palette; CACHE_ALIGN uint32_t clut32[16]; - ColorspaceConvertBuffer555To8888Opaque(clut4, clut32, 16); + ColorspaceConvertBuffer555xTo8888Opaque(clut4, clut32, 16); clut32[0] = 0x00000000; // Load the image from the icon pixel data. diff --git a/desmume/src/frontend/cocoa/cocoa_videofilter.mm b/desmume/src/frontend/cocoa/cocoa_videofilter.mm index 82a16a6f5..0ba9a7293 100644 --- a/desmume/src/frontend/cocoa/cocoa_videofilter.mm +++ b/desmume/src/frontend/cocoa/cocoa_videofilter.mm @@ -1,6 +1,6 @@ /* Copyright (C) 2011 Roger Manuel - Copyright (C) 2013 DeSmuME team + Copyright (C) 2013-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -139,7 +139,7 @@ - (NSBitmapImageRep *) bitmapImageRep } uint32_t *bitmapData = (uint32_t *)[imageRep bitmapData]; - ColorspaceConvertBuffer888XTo8888Opaque((const uint32_t *)[self runFilter], bitmapData, w * h); + ColorspaceConvertBuffer888xTo8888Opaque((const uint32_t *)[self runFilter], bitmapData, w * h); #ifdef MSB_FIRST for (size_t i = 0; i < w * h; i++) diff --git a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm index e83459aff..7a9942393 100644 --- a/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm +++ b/desmume/src/frontend/cocoa/userinterface/MacMetalDisplayView.mm @@ -1,5 +1,5 @@ /* - Copyright (C) 2017-2023 DeSmuME team + Copyright (C) 2017-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2556,7 +2556,7 @@ - (void) display GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]); pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]); - ColorspaceConvertBuffer555To8888Opaque(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer555xTo8888Opaque(this->_fetchDisplayInfo[bufferIndex].nativeBuffer16[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); } @@ -2570,7 +2570,7 @@ - (void) display GPU->PostprocessDisplay(displayID, this->_fetchDisplayInfo[bufferIndex]); pthread_rwlock_wrlock(&this->_srcCloneRWLock[displayID][bufferIndex]); - ColorspaceConvertBuffer888XTo8888Opaque((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); + ColorspaceConvertBuffer888xTo8888Opaque((u32 *)this->_fetchDisplayInfo[bufferIndex].customBuffer[displayID], this->_srcNativeClone[displayID][bufferIndex], GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT); pthread_rwlock_unlock(&this->_srcCloneRWLock[displayID][bufferIndex]); } diff --git a/desmume/src/frontend/posix/gtk/main.cpp b/desmume/src/frontend/posix/gtk/main.cpp index 898a19b6b..0e0cb7573 100644 --- a/desmume/src/frontend/posix/gtk/main.cpp +++ b/desmume/src/frontend/posix/gtk/main.cpp @@ -1396,7 +1396,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo static inline void gpu_screen_to_rgb(u32* dst) { - ColorspaceConvertBuffer555To8888Opaque(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, + ColorspaceConvertBuffer555xTo8888Opaque(GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, dst, real_framebuffer_width * real_framebuffer_height * 2); } @@ -1607,7 +1607,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo } static void RedrawScreen() { - ColorspaceConvertBuffer555To8888Opaque( + ColorspaceConvertBuffer555xTo8888Opaque( GPU->GetDisplayInfo().isCustomSizeRequested ? (u16*)(GPU->GetDisplayInfo().masterCustomBuffer) : GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), real_framebuffer_width * real_framebuffer_height * 2); #ifdef HAVE_LIBAGG diff --git a/desmume/src/frontend/posix/gtk2/main.cpp b/desmume/src/frontend/posix/gtk2/main.cpp index 70946a8e7..cee54cd60 100644 --- a/desmume/src/frontend/posix/gtk2/main.cpp +++ b/desmume/src/frontend/posix/gtk2/main.cpp @@ -1666,7 +1666,7 @@ static int ConfigureDrawingArea(GtkWidget *widget, GdkEventConfigure *event, gpo static inline void gpu_screen_to_rgb(u32* dst) { - ColorspaceConvertBuffer555To8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + ColorspaceConvertBuffer555xTo8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, dst, GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); } static inline void drawScreen(cairo_t* cr, u32* buf, gint w, gint h) { @@ -1791,7 +1791,7 @@ static gboolean ExposeDrawingArea (GtkWidget *widget, GdkEventExpose *event, gpo } static void RedrawScreen() { - ColorspaceConvertBuffer555To8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); + ColorspaceConvertBuffer555xTo8888Opaque(GPU->GetDisplayInfo().masterNativeBuffer16, (uint32_t *)video->GetSrcBufferPtr(), GPU_FRAMEBUFFER_NATIVE_WIDTH * GPU_FRAMEBUFFER_NATIVE_HEIGHT * 2); #ifdef HAVE_LIBAGG aggDraw.hud->attach((u8*)video->GetSrcBufferPtr(), 256, 384, 1024); osd->update(); diff --git a/desmume/src/frontend/windows/aviout.cpp b/desmume/src/frontend/windows/aviout.cpp index 3a5aed7ec..39be5c383 100644 --- a/desmume/src/frontend/windows/aviout.cpp +++ b/desmume/src/frontend/windows/aviout.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2006-2018 DeSmuME team + Copyright (C) 2006-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -736,7 +736,7 @@ void NDSCaptureObject::ConvertVideoSlice555Xto888(const VideoConvertParam ¶m for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++) { - ColorspaceConvertBuffer555XTo888(src, dst, param.frameWidth); + ColorspaceConvertBuffer555xTo888(src, dst, param.frameWidth); src += param.frameWidth; dst -= param.frameWidth * 3; } @@ -750,7 +750,7 @@ void NDSCaptureObject::ConvertVideoSlice888Xto888(const VideoConvertParam ¶m for (size_t y = param.firstLineIndex; y <= param.lastLineIndex; y++) { - ColorspaceConvertBuffer888XTo888(src, dst, param.frameWidth); + ColorspaceConvertBuffer888xTo888(src, dst, param.frameWidth); src += param.frameWidth; dst -= param.frameWidth * 3; } diff --git a/desmume/src/frontend/windows/display.cpp b/desmume/src/frontend/windows/display.cpp index 143136083..4e4d8b329 100644 --- a/desmume/src/frontend/windows/display.cpp +++ b/desmume/src/frontend/windows/display.cpp @@ -1,5 +1,5 @@ /* -Copyright (C) 2018 DeSmuME team +Copyright (C) 2018-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -683,9 +683,9 @@ void DoDisplay() //we have to do a copy here because we're about to draw the OSD onto it. bummer. if (gpu_bpp == 15) - ColorspaceConvertBuffer555To8888Opaque((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2); + ColorspaceConvertBuffer555xTo8888Opaque((u16 *)video.srcBuffer, video.buffer, video.srcBufferSize / 2); else - ColorspaceConvertBuffer888XTo8888Opaque((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4); + ColorspaceConvertBuffer888xTo8888Opaque((u32*)video.srcBuffer, video.buffer, video.srcBufferSize / 4); //some games use the backlight for fading effects const size_t pixCount = video.prefilterWidth * video.prefilterHeight / 2; diff --git a/desmume/src/frontend/windows/hotkey.cpp b/desmume/src/frontend/windows/hotkey.cpp index 88e6be856..fb37bd2c8 100644 --- a/desmume/src/frontend/windows/hotkey.cpp +++ b/desmume/src/frontend/windows/hotkey.cpp @@ -3,7 +3,7 @@ licensed under the terms supplied at the end of this file (for the terms are very long!) Differences from that baseline version are: - Copyright (C) 2009-2019 DeSmuME team + Copyright (C) 2009-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -174,7 +174,7 @@ static void DoScreenshot(const char* fname) else { u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4); - ColorspaceConvertBuffer888XTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); + ColorspaceConvertBuffer888xTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); NDS_WritePNG_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight*2, swapbuf, fname); free_aligned(swapbuf); } @@ -189,7 +189,7 @@ static void DoScreenshot(const char* fname) else { u32* swapbuf = (u32*)malloc_alignedCacheLine(dispInfo.customWidth * dispInfo.customHeight * 2 * 4); - ColorspaceConvertBuffer888XTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); + ColorspaceConvertBuffer888xTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, dispInfo.customWidth * dispInfo.customHeight * 2); NDS_WriteBMP_32bppBuffer(dispInfo.customWidth, dispInfo.customHeight *2, swapbuf, fname); free_aligned(swapbuf); } diff --git a/desmume/src/frontend/windows/main.cpp b/desmume/src/frontend/windows/main.cpp index 7766d3a47..796c3349b 100644 --- a/desmume/src/frontend/windows/main.cpp +++ b/desmume/src/frontend/windows/main.cpp @@ -3441,7 +3441,7 @@ void ScreenshotToClipboard(bool extraInfo) else { u32* swapbuf = (u32*)malloc_alignedPage(width*height * 4); - ColorspaceConvertBuffer888XTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height); + ColorspaceConvertBuffer888xTo8888Opaque((const u32*)dispInfo.masterCustomBuffer, swapbuf, width * height); SetDIBitsToDevice(hMemDC, 0, 0, width, height, 0, 0, 0, height, swapbuf, (BITMAPINFO*)&bmi, DIB_RGB_COLORS); diff --git a/desmume/src/frontend/windows/ogl.cpp b/desmume/src/frontend/windows/ogl.cpp index 9c14c56da..053ff365d 100644 --- a/desmume/src/frontend/windows/ogl.cpp +++ b/desmume/src/frontend/windows/ogl.cpp @@ -110,7 +110,7 @@ static bool wgl_beginOpenGL() return true; } -static bool wgl_endOpenGL() +static void wgl_endOpenGL() { // Do nothing. } diff --git a/desmume/src/rasterize.cpp b/desmume/src/rasterize.cpp index 64684b26a..fa7458cf1 100644 --- a/desmume/src/rasterize.cpp +++ b/desmume/src/rasterize.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2009-2023 DeSmuME team + Copyright (C) 2009-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -2032,7 +2032,7 @@ Render3DError SoftRasterizerRenderer::BeginRender(const GFX3D_State &renderState } // Convert the toon table colors - ColorspaceConvertBuffer555To6665Opaque(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32); + ColorspaceConvertBuffer555xTo6665Opaque(renderState.toonTable16, (u32 *)this->toonColor32LUT, 32); if (this->_enableEdgeMark) { diff --git a/desmume/src/texcache.cpp b/desmume/src/texcache.cpp index faafd5574..692cd5504 100644 --- a/desmume/src/texcache.cpp +++ b/desmume/src/texcache.cpp @@ -1,7 +1,7 @@ /* Copyright (C) 2006 yopyop Copyright (C) 2006-2007 shash - Copyright (C) 2008-2023 DeSmuME team + Copyright (C) 2008-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -867,13 +867,13 @@ void __NDSTextureUnpackI2_AVX2(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -923,13 +923,13 @@ void __NDSTextureUnpackI2_SSSE3(const size_t texelCount, const u8 *__restrict sr if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -977,13 +977,13 @@ void __NDSTextureUnpackI2_NEON(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } else { - ColorspaceConvert555To8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1028,13 +1028,13 @@ void __NDSTextureUnpackI2_AltiVec(const size_t texelCount, const u8 *__restrict if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } else { - ColorspaceConvert555To8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } // Set converted colors to 0 if the palette index is 0. @@ -1146,13 +1146,13 @@ void __NDSTextureUnpackI4_AVX2(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1208,13 +1208,13 @@ void __NDSTextureUnpackI4_SSSE3(const size_t texelCount, const u8 *__restrict sr if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } else { - ColorspaceConvert555To8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor0, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(palColor1, convertedColor[2], convertedColor[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1261,13 +1261,13 @@ void __NDSTextureUnpackI4_NEON(const size_t texelCount, const u8 *__restrict src if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo6665Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } else { - ColorspaceConvert555To8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor0, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo8888Opaque_NEON(palColor1, convertedColor.val[2], convertedColor.val[3]); } // Set converted colors to 0 if the palette index is 0. @@ -1312,13 +1312,13 @@ void __NDSTextureUnpackI4_AltiVec(const size_t texelCount, const u8 *__restrict if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo6665Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } else { - ColorspaceConvert555To8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor0, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo8888Opaque_AltiVec(palColor1, convertedColor[3], convertedColor[2]); } // Set converted colors to 0 if the palette index is 0. @@ -1434,13 +1434,13 @@ void __NDSTextureUnpackA3I5_NEON(const size_t texelCount, const u8 *__restrict s if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } else { - ColorspaceConvert555To8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } vst1q_u32_x4(dstBuffer + i, convertedColor); @@ -1486,13 +1486,13 @@ void __NDSTextureUnpackA3I5_AltiVec(const size_t texelCount, const u8 *__restric if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } else { - ColorspaceConvert555To8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } vec_st(convertedColor[0], 0, dstBuffer); @@ -1566,8 +1566,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha); - ColorspaceConvert555To6665_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo6665_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo6665_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } else { @@ -1577,8 +1577,8 @@ void __NDSTextureUnpackA5I3_AVX2(const size_t texelCount, const u8 *__restrict s const v256u16 alphaLo = _mm256_unpacklo_epi8(_mm256_setzero_si256(), alpha); const v256u16 alphaHi = _mm256_unpackhi_epi8(_mm256_setzero_si256(), alpha); - ColorspaceConvert555To8888_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo8888_AVX2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo8888_AVX2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } _mm256_store_si256((v256u32 *)dstBuffer + 0, convertedColor[0]); @@ -1615,8 +1615,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - ColorspaceConvert555To6665_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To6665_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo6665_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo6665_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } else { @@ -1624,8 +1624,8 @@ void __NDSTextureUnpackA5I3_SSSE3(const size_t texelCount, const u8 *__restrict const v128u16 alphaLo = _mm_unpacklo_epi8(_mm_setzero_si128(), alpha); const v128u16 alphaHi = _mm_unpackhi_epi8(_mm_setzero_si128(), alpha); - ColorspaceConvert555To8888_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); - ColorspaceConvert555To8888_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); + ColorspaceConvert555aTo8888_SSE2(palColor0, alphaLo, convertedColor[0], convertedColor[1]); + ColorspaceConvert555aTo8888_SSE2(palColor1, alphaHi, convertedColor[2], convertedColor[3]); } _mm_store_si128((v128u32 *)(dstBuffer + i) + 0, convertedColor[0]); @@ -1661,8 +1661,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) ); - ColorspaceConvert555To6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo6665_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo6665_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } else { @@ -1670,8 +1670,8 @@ void __NDSTextureUnpackA5I3_NEON(const size_t texelCount, const u8 *__restrict s const v128u16 alphaLo = vreinterpretq_u16_u8( vzip1q_u8(vdupq_n_u8(0), alpha) ); const v128u16 alphaHi = vreinterpretq_u16_u8( vzip2q_u8(vdupq_n_u8(0), alpha) ); - ColorspaceConvert555To8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); - ColorspaceConvert555To8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); + ColorspaceConvert555aTo8888_NEON(palColor0, alphaLo, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555aTo8888_NEON(palColor1, alphaHi, convertedColor.val[2], convertedColor.val[3]); } vst1q_u32_x4(dstBuffer + i, convertedColor); @@ -1707,8 +1707,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) ); const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) ); - ColorspaceConvert555To6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo6665_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo6665_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } else { @@ -1716,8 +1716,8 @@ void __NDSTextureUnpackA5I3_AltiVec(const size_t texelCount, const u8 *__restric const v128u16 alphaLo = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x04,0x10,0x05,0x10,0x06,0x10,0x07, 0x10,0x00,0x10,0x01,0x10,0x02,0x10,0x03}) ); const v128u16 alphaHi = vec_perm( (v128u8)alpha, ((v128u8){0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}), ((v128u8){0x10,0x0C,0x10,0x0D,0x10,0x0E,0x10,0x0F, 0x10,0x08,0x10,0x09,0x10,0x0A,0x10,0x0B}) ); - ColorspaceConvert555To8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); - ColorspaceConvert555To8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); + ColorspaceConvert555aTo8888_AltiVec(palColor0, alphaLo, convertedColor[1], convertedColor[0]); + ColorspaceConvert555aTo8888_AltiVec(palColor1, alphaHi, convertedColor[3], convertedColor[2]); } vec_st(convertedColor[0], 0, dstBuffer); @@ -1900,11 +1900,11 @@ void __NDSTextureUnpackDirect16Bit_AVX2(const size_t texelCount, const u16 *__re if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AVX2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_AVX2(c, convertedColor[0], convertedColor[1]); } else { - ColorspaceConvert555To8888Opaque_AVX2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_AVX2(c, convertedColor[0], convertedColor[1]); } v256u16 alpha = _mm256_cmpeq_epi16(_mm256_srli_epi16(c, 15), _mm256_set1_epi16(1)); @@ -1930,11 +1930,11 @@ void __NDSTextureUnpackDirect16Bit_SSE2(const size_t texelCount, const u16 *__re if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_SSE2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo6665Opaque_SSE2(c, convertedColor[0], convertedColor[1]); } else { - ColorspaceConvert555To8888Opaque_SSE2(c, convertedColor[0], convertedColor[1]); + ColorspaceConvert555xTo8888Opaque_SSE2(c, convertedColor[0], convertedColor[1]); } const v128u16 alpha = _mm_cmpeq_epi16(_mm_srli_epi16(c, 15), _mm_set1_epi16(1)); @@ -1959,11 +1959,11 @@ void __NDSTextureUnpackDirect16Bit_NEON(const size_t texelCount, const u16 *__re if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo6665Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); } else { - ColorspaceConvert555To8888Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); + ColorspaceConvert555xTo8888Opaque_NEON(c, convertedColor.val[0], convertedColor.val[1]); } const v128u16 alpha = vceqq_u16(vshrq_n_u16(c,15), vdupq_n_u16(1)); @@ -1987,11 +1987,11 @@ void __NDSTextureUnpackDirect16Bit_AltiVec(const size_t texelCount, const u16 *_ if (TEXCACHEFORMAT == TexFormat_15bpp) { - ColorspaceConvert555To6665Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo6665Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); } else { - ColorspaceConvert555To8888Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); + ColorspaceConvert555xTo8888Opaque_AltiVec(c, convertedColor[1], convertedColor[0]); } const v128u16 alpha = vec_and(c, ((v128u16){0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080,0x0080})); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp index 9704845be..c5034f845 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2023 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -187,7 +187,7 @@ void ColorspaceHandlerInit() } template -void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -198,22 +198,22 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To8888Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To8888Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo8888Opaque(src, dst, pixCountVector); } } @@ -243,7 +243,7 @@ void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__re } template -void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -254,22 +254,22 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555To6665Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555To6665Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo6665Opaque(src, dst, pixCountVector); } } @@ -298,6 +298,119 @@ void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__re } } + +template +void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +{ + size_t i = 0; + +#ifdef USEMANUALVECTORIZATION + const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16))); + + if (SWAP_RB) + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To8888_SwapRB_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To8888_SwapRB(src, dst, pixCountVector); + } + } + else + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To8888_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To8888(src, dst, pixCountVector); + } + } + +#pragma LOOPVECTORIZE_DISABLE +#endif // USEMANUALVECTORIZATION + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To8888(src[i]); + break; + + case BESwapIn: + dst[i] = ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(src[i]) ); + break; + + case BESwapInOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])) ); + break; + } + } +} + +template +void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) +{ + size_t i = 0; + +#ifdef USEMANUALVECTORIZATION + const size_t pixCountVector = pixCount - (pixCount % (VECTORSIZE / sizeof(u16))); + + if (SWAP_RB) + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To6665_SwapRB_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To6665_SwapRB(src, dst, pixCountVector); + } + } + else + { + if (IS_UNALIGNED) + { + i = csh.ConvertBuffer5551To6665_IsUnaligned(src, dst, pixCountVector); + } + else + { + i = csh.ConvertBuffer5551To6665(src, dst, pixCountVector); + } + } + +#pragma LOOPVECTORIZE_DISABLE +#endif // USEMANUALVECTORIZATION + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To6665(src[i]); + break; + + case BESwapIn: + dst[i] = ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(src[i]) ); + break; + + case BESwapInOut: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])) ); + break; + } + } +} + template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) { @@ -455,7 +568,7 @@ void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restric } template -void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) +void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) { size_t i = 0; @@ -466,22 +579,22 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo8888Opaque_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo8888Opaque(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo8888Opaque(src, dst, pixCountVector); } } @@ -494,7 +607,7 @@ void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pi } template -void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -505,22 +618,22 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555XTo888_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer555XTo888_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer555XTo888(src, dst, pixCountVector); + i = csh.ConvertBuffer555xTo888(src, dst, pixCountVector); } } @@ -533,7 +646,7 @@ void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict } template -void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) +void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) { size_t i = 0; @@ -544,22 +657,22 @@ void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888_SwapRB_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo888_SwapRB(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888_SwapRB(src, dst, pixCountVector); } } else { if (IS_UNALIGNED) { - i = csh.ConvertBuffer888XTo888_IsUnaligned(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888_IsUnaligned(src, dst, pixCountVector); } else { - i = csh.ConvertBuffer888XTo888(src, dst, pixCountVector); + i = csh.ConvertBuffer888xTo888(src, dst, pixCountVector); } } @@ -811,7 +924,7 @@ void ColorspaceApplyIntensityToBuffer32(u32 *dst, size_t pixCount, float intensi } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -841,7 +954,7 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque(const u16 *__restrict src } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -871,19 +984,19 @@ size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restr } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To8888Opaque(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque(src, dst, pixCount); } template -size_t ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To8888Opaque_SwapRB(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer555xTo8888Opaque_SwapRB(src, dst, pixCount); } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -913,7 +1026,7 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque(const u16 *__restrict src } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -943,15 +1056,159 @@ size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restr } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer555xTo6665Opaque_SwapRB(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To8888(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To8888(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To8888(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer5551To8888(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return this->ColorspaceHandler::ConvertBuffer5551To8888_SwapRB(src, dst, pixCount); +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To6665(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + size_t i = 0; + + for (; i < pixCount; i++) + { + switch (BE_BYTESWAP) + { + case BESwapNone: + dst[i] = ColorspaceConvert5551To6665(src[i]); + break; + + case BESwapSrc: + dst[i] = ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])); + break; + + case BESwapDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(src[i]) ); + break; + + case BESwapSrcDst: + dst[i] = LE_TO_LOCAL_32( ColorspaceConvert5551To6665(LE_TO_LOCAL_16(src[i])) ); + break; + } + } + + return i; +} + +template +size_t ColorspaceHandler::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To6665Opaque(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer5551To6665(src, dst, pixCount); } template -size_t ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return this->ColorspaceHandler::ConvertBuffer555To6665Opaque_SwapRB(src, dst, pixCount); + return this->ColorspaceHandler::ConvertBuffer5551To6665_SwapRB(src, dst, pixCount); } size_t ColorspaceHandler::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -1090,7 +1347,7 @@ size_t ColorspaceHandler::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 * return this->ColorspaceHandler::ConvertBuffer6665To5551_SwapRB(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { size_t i = 0; @@ -1102,7 +1359,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { size_t i = 0; @@ -1114,17 +1371,17 @@ size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return this->ConvertBuffer888XTo8888Opaque(src, dst, pixCount); + return this->ConvertBuffer888xTo8888Opaque(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return this->ConvertBuffer888XTo8888Opaque_SwapRB(src, dst, pixCount); + return this->ConvertBuffer888xTo8888Opaque_SwapRB(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1136,7 +1393,7 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888(const u16 *__restrict src, u8 * return i; } -size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1148,17 +1405,17 @@ size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict sr return i; } -size_t ColorspaceHandler::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer555XTo888(src, dst, pixCount); + return this->ConvertBuffer555xTo888(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer555XTo888_SwapRB(src, dst, pixCount); + return this->ConvertBuffer555xTo888_SwapRB(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1170,7 +1427,7 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888(const u32 *__restrict src, u8 * return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { size_t i = 0; @@ -1182,14 +1439,14 @@ size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict sr return i; } -size_t ColorspaceHandler::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer888XTo888(src, dst, pixCount); + return this->ConvertBuffer888xTo888(src, dst, pixCount); } -size_t ColorspaceHandler::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return this->ConvertBuffer888XTo888_SwapRB(src, dst, pixCount); + return this->ConvertBuffer888xTo888_SwapRB(src, dst, pixCount); } size_t ColorspaceHandler::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -1396,39 +1653,73 @@ size_t ColorspaceHandler::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 *dst, return this->ApplyIntensityToBuffer32_SwapRB(dst, pixCount, intensity); } -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); - -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); + +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); + +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); + +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); @@ -1450,20 +1741,20 @@ template void ColorspaceConvertBuffer6665To5551(const u32 *__restri template void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount); template void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler.h b/desmume/src/utils/colorspacehandler/colorspacehandler.h index 48bae72e8..69bd913d5 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2023 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -126,6 +126,26 @@ FORCEINLINE u32 ColorspaceConvert555To6665Opaque(const u16 src) return (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF); } +template +FORCEINLINE u32 ColorspaceConvert5551To8888(const u16 src) +{ + Color4u8 outColor; + outColor.value = (SWAP_RB) ? COLOR555TO8888_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO8888_OPAQUE(src & 0x7FFF); + outColor.a = (src & 0x8000) ? 0xFF : 0x00; + + return outColor.value; +} + +template +FORCEINLINE u32 ColorspaceConvert5551To6665(const u16 src) +{ + Color4u8 outColor; + outColor.value = (SWAP_RB) ? COLOR555TO6665_OPAQUE_SWAP_RB(src & 0x7FFF) : COLOR555TO6665_OPAQUE(src & 0x7FFF); + outColor.a = (src & 0x8000) ? 0x1F : 0x00; + + return outColor.value; +} + template FORCEINLINE u32 ColorspaceConvert8888To6665(Color4u8 srcColor) { @@ -331,16 +351,18 @@ FORCEINLINE u32 ColorspaceApplyIntensity32(u32 srcColor, float intensity) return ColorspaceApplyIntensity32(srcColorComponent); } -template void ColorspaceConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer6665To8888(const u32 *src, u32 *dst, size_t pixCount); template void ColorspaceConvertBuffer8888To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); template void ColorspaceConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount); -template void ColorspaceConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); -template void ColorspaceConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount); +template void ColorspaceConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount); template void ColorspaceCopyBuffer16(const u16 *src, u16 *dst, size_t pixCount); template void ColorspaceCopyBuffer32(const u32 *src, u32 *dst, size_t pixCount); @@ -353,15 +375,25 @@ class ColorspaceHandler public: ColorspaceHandler() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -383,20 +415,20 @@ class ColorspaceHandler size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp index e35747f34..378015905 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ #include template -FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -64,7 +64,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -101,7 +101,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u } template -FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -141,7 +141,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -178,17 +178,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0xFF00); - ColorspaceConvert555To8888_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) { const v256u16 srcAlphaBits16 = _mm256_set1_epi16(0x1F00); - ColorspaceConvert555To6665_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +{ + const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0xFF00) ); + ColorspaceConvert555aTo8888_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi) +{ + const v256u16 srcAlphaBits16 = _mm256_and_si256( _mm256_cmpgt_epi16(srcColor, _mm256_set1_epi16(0xFFFF)), _mm256_set1_epi16(0x1F00) ); + ColorspaceConvert555aTo6665_AVX2(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -320,7 +334,7 @@ FORCEINLINE v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const } template -FORCEINLINE v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src) +FORCEINLINE v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src) { if (SWAP_RB) { @@ -407,7 +421,59 @@ FORCEINLINE v256u32 ColorspaceApplyIntensity32_AVX2(const v256u32 &src, float in } template -static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256) +{ + size_t i = 0; + + for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16))) + { + v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); + v256u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert555xTo8888Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256) +{ + size_t i = 0; + + for (; i < pixCountVec256; i+=(sizeof(v256u16)/sizeof(u16))) + { + v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); + v256u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert555xTo6665Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_storeu_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm256_store_si256((v256u32 *)(dst+i+(sizeof(v256u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_AVX2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec256) { size_t i = 0; @@ -415,7 +481,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict { v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); v256u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + ColorspaceConvert5551To8888_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -433,7 +499,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX2(const u16 *__restrict } template -size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer5551To6665_AVX2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec256) { size_t i = 0; @@ -441,7 +507,7 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX2(const u16 *__restrict src, u3 { v256u16 src_vec256 = (IS_UNALIGNED) ? _mm256_loadu_si256((v256u16 *)(src+i)) : _mm256_load_si256((v256u16 *)(src+i)); v256u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); + ColorspaceConvert5551To6665_AVX2(src_vec256, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -539,7 +605,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX2(const u32 *__restrict src, u16 *__ } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX2(const u32 *src, u32 *dst, size_t pixCountVec256) { size_t i = 0; @@ -547,11 +613,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si { if (IS_UNALIGNED) { - _mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2(_mm256_loadu_si256((v256u32 *)(src+i))) ); + _mm256_storeu_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2(_mm256_loadu_si256((v256u32 *)(src+i))) ); } else { - _mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX2(_mm256_load_si256((v256u32 *)(src+i))) ); + _mm256_store_si256( (v256u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX2(_mm256_load_si256((v256u32 *)(src+i))) ); } } @@ -559,7 +625,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX2(const u32 *src, u32 *dst, si } template -size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer555xTo888_AVX2(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) { size_t i = 0; v256u16 src_v256u16[2]; @@ -636,7 +702,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX2(const u16 *__restrict src, u8 *__re } template -size_t ColorspaceConvertBuffer888XTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) +size_t ColorspaceConvertBuffer888xTo888_AVX2(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec256) { size_t i = 0; v256u32 src_v256u32[4]; @@ -905,51 +971,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX2(u32 *dst, size_t pixCountVec256, } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo6665Opaque_AVX2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer5551To6665_AVX2(src, dst, pixCount); } size_t ColorspaceHandler_AVX2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -1032,64 +1146,64 @@ size_t ColorspaceHandler_AVX2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const return ColorspaceConvertBuffer6665To5551_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } -size_t ColorspaceHandler_AVX2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX2(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX2(src, dst, pixCount); } size_t ColorspaceHandler_AVX2::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -1152,23 +1266,23 @@ size_t ColorspaceHandler_AVX2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 * return ColorspaceApplyIntensityToBuffer32_AVX2(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src); template v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src); @@ -1182,8 +1296,8 @@ template v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, c template v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); template v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); -template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src); -template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src); +template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src); +template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src); template v256u16 ColorspaceCopy16_AVX2(const v256u16 &src); template v256u16 ColorspaceCopy16_AVX2(const v256u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h index af8f832d7..572f8025e 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX2.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires AVX2 support. #else -template void ColorspaceConvert555To8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX2(const v256u16 &srcColor, const v256u16 &srcAlphaBits, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert5551To8888_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); +template void ColorspaceConvert5551To6665_AVX2(const v256u16 &srcColor, v256u32 &dstLo, v256u32 &dstHi); template v256u32 ColorspaceConvert8888To6665_AVX2(const v256u32 &src); template v256u32 ColorspaceConvert6665To8888_AVX2(const v256u32 &src); template v256u16 ColorspaceConvert8888To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); template v256u16 ColorspaceConvert6665To5551_AVX2(const v256u32 &srcLo, const v256u32 &srcHi); -template v256u32 ColorspaceConvert888XTo8888Opaque_AVX2(const v256u32 &src); +template v256u32 ColorspaceConvert888xTo8888Opaque_AVX2(const v256u32 &src); template v256u16 ColorspaceCopy16_AVX2(const v256u16 &src); template v256u32 ColorspaceCopy32_AVX2(const v256u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_AVX2 : public ColorspaceHandler public: ColorspaceHandler_AVX2() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,20 +89,20 @@ class ColorspaceHandler_AVX2 : public ColorspaceHandler size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp index 713b145ba..0c404687e 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -25,7 +25,7 @@ #include template -FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -44,7 +44,7 @@ FORCEINLINE void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, cons } template -FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -62,7 +62,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v51 } template -FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -81,7 +81,7 @@ FORCEINLINE void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, cons } template -FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -99,17 +99,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v51 } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0xFF00); - ColorspaceConvert555To8888_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) { const v512u16 srcAlphaBits16 = _mm512_set1_epi16(0x1F00); - ColorspaceConvert555To6665_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +{ + const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0xFF00) ); + ColorspaceConvert555aTo8888_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi) +{ + const v512u16 srcAlphaBits16 = _mm512_and_si512( _mm512_cmpgt_epi16(srcColor, _mm512_set1_epi16(0xFFFF)), _mm512_set1_epi16(0x1F00) ); + ColorspaceConvert555aTo6665_AVX512(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -239,7 +253,7 @@ FORCEINLINE v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, con } template -FORCEINLINE v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src) +FORCEINLINE v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src) { if (SWAP_RB) { @@ -326,7 +340,59 @@ FORCEINLINE v512u32 ColorspaceApplyIntensity32_AVX512(const v512u32 &src, float } template -static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512) +{ + size_t i = 0; + + for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16))) + { + v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); + v512u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert555xTo8888Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +size_t ColorspaceConvertBuffer555xTo6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512) +{ + size_t i = 0; + + for (; i < pixCountVec512; i+=(sizeof(v512u16)/sizeof(u16))) + { + v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); + v512u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert555xTo6665Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_storeu_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm512_store_si512((v512u32 *)(dst+i+(sizeof(v512u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_AVX512(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec512) { size_t i = 0; @@ -334,7 +400,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric { v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); v512u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + ColorspaceConvert5551To8888_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -352,7 +418,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AVX512(const u16 *__restric } template -size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer5551To6665_AVX512(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec512) { size_t i = 0; @@ -360,7 +426,7 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AVX512(const u16 *__restrict src, { v512u16 src_vec512 = (IS_UNALIGNED) ? _mm512_loadu_si512((v512u16 *)(src+i)) : _mm512_load_si512((v512u16 *)(src+i)); v512u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); + ColorspaceConvert5551To6665_AVX512(src_vec512, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -458,7 +524,7 @@ size_t ColorspaceConvertBuffer6665To5551_AVX512(const u32 *__restrict src, u16 * } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer888xTo8888Opaque_AVX512(const u32 *src, u32 *dst, size_t pixCountVec512) { size_t i = 0; @@ -466,11 +532,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, { if (IS_UNALIGNED) { - _mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512(_mm512_loadu_si512((v512u32 *)(src+i))) ); + _mm512_storeu_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512(_mm512_loadu_si512((v512u32 *)(src+i))) ); } else { - _mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_AVX512(_mm512_load_si512((v512u32 *)(src+i))) ); + _mm512_store_si512( (v512u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_AVX512(_mm512_load_si512((v512u32 *)(src+i))) ); } } @@ -478,7 +544,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_AVX512(const u32 *src, u32 *dst, } template -size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer555xTo888_AVX512(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) { size_t i = 0; v512u16 src_v512u16[2]; @@ -572,7 +638,7 @@ size_t ColorspaceConvertBuffer555XTo888_AVX512(const u16 *__restrict src, u8 *__ } template -size_t ColorspaceConvertBuffer888XTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) +size_t ColorspaceConvertBuffer888xTo888_AVX512(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec512) { size_t i = 0; v512u32 src_v512u32[4]; @@ -858,51 +924,99 @@ size_t ColorspaceApplyIntensityToBuffer32_AVX512(u32 *dst, size_t pixCountVec512 } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo8888Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); } template -size_t ColorspaceHandler_AVX512::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AVX512::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_AVX512(src, dst, pixCount); } size_t ColorspaceHandler_AVX512::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -985,64 +1099,64 @@ size_t ColorspaceHandler_AVX512::ConvertBuffer6665To5551_SwapRB_IsUnaligned(cons return ColorspaceConvertBuffer6665To5551_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } -size_t ColorspaceHandler_AVX512::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AVX512::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AVX512(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AVX512(src, dst, pixCount); } size_t ColorspaceHandler_AVX512::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -1105,23 +1219,29 @@ size_t ColorspaceHandler_AVX512::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 return ColorspaceApplyIntensityToBuffer32_AVX512(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); + +template void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); + +template void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src); template v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src); @@ -1135,8 +1255,8 @@ template v512u16 ColorspaceConvert8888To5551_AVX512(const v512u32 &srcLo, template v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); template v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); -template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src); -template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src); +template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src); +template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src); template v512u16 ColorspaceCopy16_AVX512(const v512u16 &src); template v512u16 ColorspaceCopy16_AVX512(const v512u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h index b04077a66..09283de67 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AVX512.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires AVX-512 Tier-1 support. #else -template void ColorspaceConvert555To8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo888X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555XTo666X_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo8888_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo888x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555aTo6665_AVX512(const v512u16 &srcColor, const v512u16 &srcAlphaBits, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo666x_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To8888_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); +template void ColorspaceConvert5551To6665_AVX512(const v512u16 &srcColor, v512u32 &dstLo, v512u32 &dstHi); template v512u32 ColorspaceConvert8888To6665_AVX512(const v512u32 &src); template v512u32 ColorspaceConvert6665To8888_AVX512(const v512u32 &src); template v512u16 ColorspaceConvert8888To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); template v512u16 ColorspaceConvert6665To5551_AVX512(const v512u32 &srcLo, const v512u32 &srcHi); -template v512u32 ColorspaceConvert888XTo8888Opaque_AVX512(const v512u32 &src); +template v512u32 ColorspaceConvert888xTo8888Opaque_AVX512(const v512u32 &src); template v512u16 ColorspaceCopy16_AVX512(const v512u16 &src); template v512u32 ColorspaceCopy32_AVX512(const v512u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_AVX512 : public ColorspaceHandler public: ColorspaceHandler_AVX512() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,20 +89,20 @@ class ColorspaceHandler_AVX512 : public ColorspaceHandler size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp index e28949998..68ea0cb30 100755 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2022 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,7 +24,7 @@ #include template -FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -65,14 +65,14 @@ FORCEINLINE void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, con } template -FORCEINLINE void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; - ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -113,24 +113,38 @@ FORCEINLINE void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, con } template -FORCEINLINE void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0, 0, 0, 0, 0, 0, 0, 0}; - ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF}; - ColorspaceConvert555To8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = {0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F, 0x1F1F}; - ColorspaceConvert555To6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF, 0xFFFF,0xFFFF, 0xFFFF,0xFFFF, 0xFFFF,0xFFFF}) ); + ColorspaceConvert555aTo8888_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = vec_and( (v128u16)vec_cmpgt( (v128s16)srcColor, ((v128s16){0xFFFF,0xFFFF, 0xFFFF,0xFFFF, 0xFFFF,0xFFFF, 0xFFFF,0xFFFF}) ), ((v128u16){0x1F1F,0x1F1F, 0x1F1F,0x1F1F, 0x1F1F,0x1F1F, 0x1F1F,0x1F1F}) ); + ColorspaceConvert555aTo6665_AltiVec(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -230,7 +244,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, co } template -FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src) +FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src) { if (SWAP_RB) { @@ -263,7 +277,41 @@ FORCEINLINE v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src) } template -static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16)) + { + v128u32 dstConvertedLo, dstConvertedHi; + + ColorspaceConvert555xTo8888Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + vec_st(dstConvertedHi, 0, dst+i); + vec_st(dstConvertedLo, 16, dst+i); + } + + return i; +} + +template +size_t ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=sizeof(v128u16)/sizeof(u16)) + { + v128u32 dstConvertedLo, dstConvertedHi; + + ColorspaceConvert555xTo6665Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + vec_st(dstConvertedHi, 0, dst+i); + vec_st(dstConvertedLo, 16, dst+i); + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_AltiVec(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) { size_t i = 0; @@ -271,7 +319,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri { v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + ColorspaceConvert5551To8888_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedLo, 16, dst+i); } @@ -280,7 +328,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_AltiVec(const u16 *__restri } template -size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer5551To6665_AltiVec(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) { size_t i = 0; @@ -288,7 +336,7 @@ size_t ColorspaceConvertBuffer555To6665Opaque_AltiVec(const u16 *__restrict src, { v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); + ColorspaceConvert5551To6665_AltiVec( vec_ld(0, src+i), dstConvertedLo, dstConvertedHi ); vec_st(dstConvertedHi, 0, dst+i); vec_st(dstConvertedLo, 16, dst+i); } @@ -349,20 +397,20 @@ size_t ColorspaceConvertBuffer6665To5551_AltiVec(const u32 *__restrict src, u16 } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(const u32 *src, u32 *dst, size_t pixCountVec128) { size_t i = 0; for (; i < pixCountVec128; i+=4) { - vec_st( ColorspaceConvert888XTo8888Opaque_AltiVec(vec_ld(0, src+i)), 0, dst+i ); + vec_st( ColorspaceConvert888xTo8888Opaque_AltiVec(vec_ld(0, src+i)), 0, dst+i ); } return i; } template -size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer555xTo888_AltiVec(const u16 *src, u8 *dst, size_t pixCountVec128) { size_t i = 0; v128u16 src_v128u16[2]; @@ -405,7 +453,7 @@ size_t ColorspaceConvertBuffer555XTo888_AltiVec(const u16 *src, u8 *dst, size_t } template -size_t ColorspaceConvertBuffer888XTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo888_AltiVec(const u32 *src, u8 *dst, size_t pixCountVec128) { size_t i = 0; v128u32 src_v128u32[4]; @@ -477,27 +525,51 @@ size_t ColorspaceCopyBuffer32_AltiVec(const u32 *src, u32 *dst, size_t pixCountV } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo6665Opaque_AltiVec(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AltiVec(src, dst, pixCount); } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_AltiVec(src, dst, pixCount); } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer5551To6665_AltiVec(src, dst, pixCount); } template -size_t ColorspaceHandler_AltiVec::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer5551To6665_AltiVec(src, dst, pixCount); } size_t ColorspaceHandler_AltiVec::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -540,34 +612,34 @@ size_t ColorspaceHandler_AltiVec::ConvertBuffer6665To5551_SwapRB(const u32 *__re return ColorspaceConvertBuffer6665To5551_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AltiVec(src, dst, pixCount); } -size_t ColorspaceHandler_AltiVec::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_AltiVec::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_AltiVec(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_AltiVec(src, dst, pixCount); } size_t ColorspaceHandler_AltiVec::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -580,59 +652,59 @@ size_t ColorspaceHandler_AltiVec::CopyBuffer32_SwapRB(const u32 *src, u32 *dst, return ColorspaceCopyBuffer32_AltiVec(src, dst, pixCount); } -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); - -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); - -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); - -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); - -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); - -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); @@ -646,8 +718,8 @@ template v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo template v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src); -template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src); template v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); template v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h index 3078a13da..c607bfc01 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_AltiVec.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,17 @@ #warning This header requires PowerPC AltiVec support. #else -template void ColorspaceConvert555To8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_AltiVec(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_AltiVec(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_AltiVec(const v128u32 &src); template v128u32 ColorspaceConvert6665To8888_AltiVec(const v128u32 &src); template v128u16 ColorspaceConvert8888To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_AltiVec(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_AltiVec(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_AltiVec(const v128u32 &src); template v128u16 ColorspaceCopy16_AltiVec(const v128u16 &src); template v128u32 ColorspaceCopy32_AltiVec(const v128u32 &src); @@ -46,11 +46,17 @@ class ColorspaceHandler_AltiVec : public ColorspaceHandler public: ColorspaceHandler_AltiVec() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -64,14 +70,14 @@ class ColorspaceHandler_AltiVec : public ColorspaceHandler size_t ConvertBuffer6665To5551(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp index 81ad657ab..3557c3975 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2022 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -28,7 +28,7 @@ #define COLOR32_SWAPRB_NEON(src) vreinterpretq_u32_u8( vqtbl1q_u8(vreinterpretq_u8_u32(src), ((v128u8){2,1,0,3, 6,5,4,7, 10,9,8,11, 14,13,12,15})) ) template -FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -60,7 +60,7 @@ FORCEINLINE void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -90,7 +90,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -122,7 +122,7 @@ FORCEINLINE void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -152,17 +152,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = vdupq_n_u16(0xFF00); - ColorspaceConvert555To8888_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = vdupq_n_u16(0x1F00); - ColorspaceConvert555To6665_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_NEON(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0xFF00) ); + ColorspaceConvert555aTo8888_NEON(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128s16 srcAlphaBits16 = vandq_s16( vcgtq_s16(vreinterpretq_u16_s16(srcColor), vdupq_n_s16(0xFFFF)), vdupq_n_s16(0x1F00) ); + ColorspaceConvert555aTo6665_NEON(srcColor, vreinterpretq_s16_u16(srcAlphaBits16), dstLo, dstHi); } template @@ -290,7 +304,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const } template -FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src) +FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src) { if (SWAP_RB) { @@ -377,7 +391,41 @@ FORCEINLINE v128u32 ColorspaceApplyIntensity32_NEON(const v128u32 &src, float in } template -static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +static size_t ColorspaceConvertBuffer555xTo8888Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +{ + size_t i = 0; + v128u16 srcVec; + uint32x4x2_t dstVec; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + srcVec = vld1q_u16(src+i); + ColorspaceConvert555xTo8888Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + vst1q_u32_x2(dst+i, dstVec); + } + + return i; +} + +template +size_t ColorspaceConvertBuffer555xTo6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +{ + size_t i = 0; + v128u16 srcVec; + uint32x4x2_t dstVec; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + srcVec = vld1q_u16(src+i); + ColorspaceConvert555xTo6665Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + vst1q_u32_x2(dst+i, dstVec); + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_NEON(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) { size_t i = 0; v128u16 srcVec; @@ -386,7 +434,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) { srcVec = vld1q_u16(src+i); - ColorspaceConvert555To8888Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + ColorspaceConvert5551To8888_NEON(srcVec, dstVec.val[0], dstVec.val[1]); vst1q_u32_x2(dst+i, dstVec); } @@ -394,7 +442,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_NEON(const u16 *__restrict } template -size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer5551To6665_NEON(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) { size_t i = 0; v128u16 srcVec; @@ -403,7 +451,7 @@ size_t ColorspaceConvertBuffer555To6665Opaque_NEON(const u16 *__restrict src, u3 for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) { srcVec = vld1q_u16(src+i); - ColorspaceConvert555To6665Opaque_NEON(srcVec, dstVec.val[0], dstVec.val[1]); + ColorspaceConvert5551To6665_NEON(srcVec, dstVec.val[0], dstVec.val[1]); vst1q_u32_x2(dst+i, dstVec); } @@ -467,7 +515,7 @@ size_t ColorspaceConvertBuffer6665To5551_NEON(const u32 *__restrict src, u16 *__ } template -size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo8888Opaque_NEON(const u32 *src, u32 *dst, size_t pixCountVec128) { size_t i = 0; uint8x16x4_t srcVec_x4; @@ -491,7 +539,7 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_NEON(const u32 *src, u32 *dst, si } template -size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer555xTo888_NEON(const u16 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) { size_t i = 0; uint16x8x2_t srcVec; @@ -529,7 +577,7 @@ size_t ColorspaceConvertBuffer555XTo888_NEON(const u16 *__restrict src, u8 *__re } template -size_t ColorspaceConvertBuffer888XTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) +size_t ColorspaceConvertBuffer888xTo888_NEON(const u32 *__restrict src, u8 *__restrict dst, size_t pixCountVec128) { size_t i = 0; uint8x16x4_t srcVec_x4; @@ -723,51 +771,99 @@ size_t ColorspaceApplyIntensityToBuffer32_NEON(u32 *dst, size_t pixCountVec128, } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer555xTo8888Opaque_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo6665Opaque_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); } template -size_t ColorspaceHandler_NEON::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555To6665Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer5551To8888_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_NEON::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_NEON(src, dst, pixCount); } size_t ColorspaceHandler_NEON::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const @@ -850,64 +946,64 @@ size_t ColorspaceHandler_NEON::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const return ColorspaceConvertBuffer6665To5551_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo8888Opaque_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo8888Opaque_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer555XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer555xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } -size_t ColorspaceHandler_NEON::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_NEON::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { - return ColorspaceConvertBuffer888XTo888_NEON(src, dst, pixCount); + return ColorspaceConvertBuffer888xTo888_NEON(src, dst, pixCount); } size_t ColorspaceHandler_NEON::CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const @@ -970,23 +1066,29 @@ size_t ColorspaceHandler_NEON::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 * return ColorspaceApplyIntensityToBuffer32_NEON(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); + +template void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src); @@ -1000,8 +1102,8 @@ template v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, c template v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src); -template v128u32 ColorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src); template v128u16 ColorspaceCopy16_NEON(const v128u16 &src); template v128u16 ColorspaceCopy16_NEON(const v128u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h index 0669fb659..dbbebee41 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_NEON.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2022 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires ARM64 NEON support. #else -template void ColorspaceConvert555To8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_NEON(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To8888_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To6665_NEON(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_NEON(const v128u32 &src); template v128u32 ColorspaceConvert6665To8888_NEON(const v128u32 &src); template v128u16 ColorspaceConvert8888To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_NEON(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 C6olorspaceConvert888XTo8888Opaque_NEON(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_NEON(const v128u32 &src); template v128u16 ColorspaceCopy16_NEON(const v128u16 &src); template v128u32 ColorspaceCopy32_NEON(const v128u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_NEON : public ColorspaceHandler public: ColorspaceHandler_NEON() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,20 +89,20 @@ class ColorspaceHandler_NEON : public ColorspaceHandler size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const; size_t CopyBuffer16_SwapRB_IsUnaligned(const u16 *src, u16 *dst, size_t pixCount) const; diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp index d933ed4f6..77e6ae22e 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.cpp @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -33,7 +33,7 @@ #endif template -FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -66,7 +66,7 @@ FORCEINLINE void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 8-bit formula: dstRGB8 = (srcRGB5 << 3) | ((srcRGB5 >> 2) & 0x07) @@ -97,7 +97,7 @@ FORCEINLINE void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -131,7 +131,7 @@ FORCEINLINE void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const } template -FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { // Conversion algorithm: // RGB 5-bit to 6-bit formula: dstRGB6 = (srcRGB5 << 1) | ((srcRGB5 >> 4) & 0x01) @@ -162,17 +162,31 @@ FORCEINLINE void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u } template -FORCEINLINE void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = _mm_set1_epi16(0xFF00); - ColorspaceConvert555To8888_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo8888_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); } template -FORCEINLINE void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +FORCEINLINE void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) { const v128u16 srcAlphaBits16 = _mm_set1_epi16(0x1F00); - ColorspaceConvert555To6665_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); + ColorspaceConvert555aTo6665_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0xFF00) ); + ColorspaceConvert555aTo8888_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); +} + +template +FORCEINLINE void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi) +{ + const v128u16 srcAlphaBits16 = _mm_and_si128( _mm_cmpgt_epi16(srcColor, _mm_set1_epi16(0xFFFF)), _mm_set1_epi16(0x1F00) ); + ColorspaceConvert555aTo6665_SSE2(srcColor, srcAlphaBits16, dstLo, dstHi); } template @@ -315,7 +329,7 @@ FORCEINLINE v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const } template -FORCEINLINE v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src) +FORCEINLINE v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src) { if (SWAP_RB) { @@ -422,7 +436,7 @@ static size_t ColorspaceConvertBuffer555To8888Opaque_SSE2(const u16 *__restrict { v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To8888Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo8888Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -448,7 +462,59 @@ size_t ColorspaceConvertBuffer555To6665Opaque_SSE2(const u16 *__restrict src, u3 { v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); v128u32 dstConvertedLo, dstConvertedHi; - ColorspaceConvert555To6665Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + ColorspaceConvert555xTo6665Opaque_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +static size_t ColorspaceConvertBuffer5551To8888_SSE2(const u16 *__restrict src, u32 *__restrict dst, const size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); + v128u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To8888_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); + + if (IS_UNALIGNED) + { + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_storeu_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + else + { + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 0)), dstConvertedLo); + _mm_store_si128((v128u32 *)(dst+i+(sizeof(v128u32)/sizeof(u32) * 1)), dstConvertedHi); + } + } + + return i; +} + +template +size_t ColorspaceConvertBuffer5551To6665_SSE2(const u16 *__restrict src, u32 *__restrict dst, size_t pixCountVec128) +{ + size_t i = 0; + + for (; i < pixCountVec128; i+=(sizeof(v128u16)/sizeof(u16))) + { + v128u16 src_vec128 = (IS_UNALIGNED) ? _mm_loadu_si128((v128u16 *)(src+i)) : _mm_load_si128((v128u16 *)(src+i)); + v128u32 dstConvertedLo, dstConvertedHi; + ColorspaceConvert5551To6665_SSE2(src_vec128, dstConvertedLo, dstConvertedHi); if (IS_UNALIGNED) { @@ -554,11 +620,11 @@ size_t ColorspaceConvertBuffer888XTo8888Opaque_SSE2(const u32 *src, u32 *dst, si { if (IS_UNALIGNED) { - _mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2(_mm_loadu_si128((v128u32 *)(src+i))) ); + _mm_storeu_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2(_mm_loadu_si128((v128u32 *)(src+i))) ); } else { - _mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888XTo8888Opaque_SSE2(_mm_load_si128((v128u32 *)(src+i))) ); + _mm_store_si128( (v128u32 *)(dst+i), ColorspaceConvert888xTo8888Opaque_SSE2(_mm_load_si128((v128u32 *)(src+i))) ); } } @@ -937,53 +1003,101 @@ size_t ColorspaceApplyIntensityToBuffer32_SSE2(u32 *dst, size_t pixCountVec128, } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To8888Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } template -size_t ColorspaceHandler_SSE2::ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555To6665Opaque_SSE2(src, dst, pixCount); } +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To8888_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + +template +size_t ColorspaceHandler_SSE2::ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const +{ + return ColorspaceConvertBuffer5551To6665_SSE2(src, dst, pixCount); +} + size_t ColorspaceHandler_SSE2::ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer8888To6665_SSE2(src, dst, pixCount); @@ -1064,64 +1178,64 @@ size_t ColorspaceHandler_SSE2::ConvertBuffer6665To5551_SwapRB_IsUnaligned(const return ColorspaceConvertBuffer6665To5551_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo8888Opaque_SSE2(src, dst, pixCount); } #ifdef ENABLE_SSSE3 -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer555XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } -size_t ColorspaceHandler_SSE2::ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const +size_t ColorspaceHandler_SSE2::ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const { return ColorspaceConvertBuffer888XTo888_SSSE3(src, dst, pixCount); } @@ -1188,23 +1302,23 @@ size_t ColorspaceHandler_SSE2::ApplyIntensityToBuffer32_SwapRB_IsUnaligned(u32 * return ColorspaceApplyIntensityToBuffer32_SSE2(dst, pixCount, intensity); } -template void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src); template v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src); @@ -1218,8 +1332,8 @@ template v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, c template v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src); -template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src); template v128u16 ColorspaceCopy16_SSE2(const v128u16 &src); template v128u16 ColorspaceCopy16_SSE2(const v128u16 &src); diff --git a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h index 094dc5178..e8cbe085b 100644 --- a/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h +++ b/desmume/src/utils/colorspacehandler/colorspacehandler_SSE2.h @@ -1,5 +1,5 @@ /* - Copyright (C) 2016-2021 DeSmuME team + Copyright (C) 2016-2024 DeSmuME team This file is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -24,17 +24,19 @@ #warning This header requires SSE2 support. #else -template void ColorspaceConvert555To8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo888X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555XTo666X_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); -template void ColorspaceConvert555To6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo8888_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo888x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555aTo6665_SSE2(const v128u16 &srcColor, const v128u16 &srcAlphaBits, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo666x_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo8888Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert555xTo6665Opaque_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To8888_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); +template void ColorspaceConvert5551To6665_SSE2(const v128u16 &srcColor, v128u32 &dstLo, v128u32 &dstHi); template v128u32 ColorspaceConvert8888To6665_SSE2(const v128u32 &src); template v128u32 ColorspaceConvert6665To8888_SSE2(const v128u32 &src); template v128u16 ColorspaceConvert8888To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); template v128u16 ColorspaceConvert6665To5551_SSE2(const v128u32 &srcLo, const v128u32 &srcHi); -template v128u32 ColorspaceConvert888XTo8888Opaque_SSE2(const v128u32 &src); +template v128u32 ColorspaceConvert888xTo8888Opaque_SSE2(const v128u32 &src); template v128u16 ColorspaceCopy16_SSE2(const v128u16 &src); template v128u32 ColorspaceCopy32_SSE2(const v128u32 &src); @@ -47,15 +49,25 @@ class ColorspaceHandler_SSE2 : public ColorspaceHandler public: ColorspaceHandler_SSE2() {}; - template size_t ConvertBuffer555To8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo8888Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; - template size_t ConvertBuffer555To6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer555xTo6665Opaque_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To8888(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To8888_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + + template size_t ConvertBuffer5551To6665(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; + template size_t ConvertBuffer5551To6665_SwapRB_IsUnaligned(const u16 *__restrict src, u32 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer8888To6665(const u32 *src, u32 *dst, size_t pixCount) const; size_t ConvertBuffer8888To6665_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; @@ -77,21 +89,21 @@ class ColorspaceHandler_SSE2 : public ColorspaceHandler size_t ConvertBuffer6665To5551_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; size_t ConvertBuffer6665To5551_SwapRB_IsUnaligned(const u32 *__restrict src, u16 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; - size_t ConvertBuffer888XTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; + size_t ConvertBuffer888xTo8888Opaque_SwapRB_IsUnaligned(const u32 *src, u32 *dst, size_t pixCount) const; #ifdef ENABLE_SSSE3 - size_t ConvertBuffer555XTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer555XTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer555xTo888_SwapRB_IsUnaligned(const u16 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; - size_t ConvertBuffer888XTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; + size_t ConvertBuffer888xTo888_SwapRB_IsUnaligned(const u32 *__restrict src, u8 *__restrict dst, size_t pixCount) const; #endif size_t CopyBuffer16_SwapRB(const u16 *src, u16 *dst, size_t pixCount) const;