From f897405a826c0cdad42e987029be65664411d49c Mon Sep 17 00:00:00 2001 From: YgorSouza <43298013+YgorSouza@users.noreply.github.com> Date: Tue, 10 Sep 2024 09:50:56 +0200 Subject: [PATCH] Use precomputed lookup table in Color32::from_rgba_unmultiplied (#5088) Improves performances significantly (about 40 times) according to the benchmarks. * Closes * [x] I have followed the instructions in the PR template --- crates/ecolor/src/color32.rs | 41 +++++++++++++++------------ crates/epaint/benches/benchmark.rs | 45 +++++++++++++++++++++++++++++- 2 files changed, 67 insertions(+), 19 deletions(-) diff --git a/crates/ecolor/src/color32.rs b/crates/ecolor/src/color32.rs index ca257303dae..9025b6ee548 100644 --- a/crates/ecolor/src/color32.rs +++ b/crates/ecolor/src/color32.rs @@ -1,6 +1,4 @@ -use crate::{ - fast_round, gamma_u8_from_linear_f32, linear_f32_from_gamma_u8, linear_f32_from_linear_u8, Rgba, -}; +use crate::{fast_round, linear_f32_from_linear_u8, Rgba}; /// This format is used for space-efficient color representation (32 bits). /// @@ -95,21 +93,28 @@ impl Color32 { /// From `sRGBA` WITHOUT premultiplied alpha. #[inline] pub fn from_rgba_unmultiplied(r: u8, g: u8, b: u8, a: u8) -> Self { - if a == 255 { - Self::from_rgb(r, g, b) // common-case optimization - } else if a == 0 { - Self::TRANSPARENT // common-case optimization - } else { - let r_lin = linear_f32_from_gamma_u8(r); - let g_lin = linear_f32_from_gamma_u8(g); - let b_lin = linear_f32_from_gamma_u8(b); - let a_lin = linear_f32_from_linear_u8(a); - - let r = gamma_u8_from_linear_f32(r_lin * a_lin); - let g = gamma_u8_from_linear_f32(g_lin * a_lin); - let b = gamma_u8_from_linear_f32(b_lin * a_lin); - - Self::from_rgba_premultiplied(r, g, b, a) + use std::sync::OnceLock; + match a { + // common-case optimization + 0 => Self::TRANSPARENT, + // common-case optimization + 255 => Self::from_rgb(r, g, b), + a => { + static LOOKUP_TABLE: OnceLock<[u8; 256 * 256]> = OnceLock::new(); + let lut = LOOKUP_TABLE.get_or_init(|| { + use crate::{gamma_u8_from_linear_f32, linear_f32_from_gamma_u8}; + core::array::from_fn(|i| { + let [value, alpha] = (i as u16).to_ne_bytes(); + let value_lin = linear_f32_from_gamma_u8(value); + let alpha_lin = linear_f32_from_linear_u8(alpha); + gamma_u8_from_linear_f32(value_lin * alpha_lin) + }) + }); + + let [r, g, b] = + [r, g, b].map(|value| lut[usize::from(u16::from_ne_bytes([value, a]))]); + Self::from_rgba_premultiplied(r, g, b, a) + } } } diff --git a/crates/epaint/benches/benchmark.rs b/crates/epaint/benches/benchmark.rs index 07a743ae404..e723638b849 100644 --- a/crates/epaint/benches/benchmark.rs +++ b/crates/epaint/benches/benchmark.rs @@ -223,6 +223,46 @@ fn thin_large_line_uv(c: &mut Criterion) { }); } +fn rgba_values() -> [[u8; 4]; 1000] { + core::array::from_fn(|i| [5, 7, 11, 13].map(|m| (i * m) as u8)) +} + +fn from_rgba_unmultiplied_0(c: &mut Criterion) { + c.bench_function("from_rgba_unmultiplied_0", move |b| { + let values = black_box(rgba_values().map(|[r, g, b, _]| [r, g, b, 0])); + b.iter(|| { + for [r, g, b, a] in values { + let color = ecolor::Color32::from_rgba_unmultiplied(r, g, b, a); + black_box(color); + } + }); + }); +} + +fn from_rgba_unmultiplied_other(c: &mut Criterion) { + c.bench_function("from_rgba_unmultiplied_other", move |b| { + let values = black_box(rgba_values().map(|[r, g, b, a]| [r, g, b, a.clamp(1, 254)])); + b.iter(|| { + for [r, g, b, a] in values { + let color = ecolor::Color32::from_rgba_unmultiplied(r, g, b, a); + black_box(color); + } + }); + }); +} + +fn from_rgba_unmultiplied_255(c: &mut Criterion) { + c.bench_function("from_rgba_unmultiplied_255", move |b| { + let values = black_box(rgba_values().map(|[r, g, b, _]| [r, g, b, 255])); + b.iter(|| { + for [r, g, b, a] in values { + let color = ecolor::Color32::from_rgba_unmultiplied(r, g, b, a); + black_box(color); + } + }); + }); +} + criterion_group!( benches, single_dashed_lines, @@ -235,6 +275,9 @@ criterion_group!( thick_line_uv, thick_large_line_uv, thin_line_uv, - thin_large_line_uv + thin_large_line_uv, + from_rgba_unmultiplied_0, + from_rgba_unmultiplied_other, + from_rgba_unmultiplied_255, ); criterion_main!(benches);