Skip to content

Commit

Permalink
Initial F64 + Autovectorize test
Browse files Browse the repository at this point in the history
disappointing. Like 10% faster at most. Probably from the branch
  • Loading branch information
Beinsezii committed Jun 3, 2024
1 parent 24d6fb8 commit 37882b9
Show file tree
Hide file tree
Showing 2 changed files with 167 additions and 8 deletions.
7 changes: 6 additions & 1 deletion benches/conversions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ pub fn conversions(c: &mut Criterion) {
} ));

c.bench_function("srgb_eotf", |b| b.iter(|| {
black_box(pixels.clone().iter_mut().for_each(|n| *n = colcon::srgb_eotf(*n)));
const N: usize = 16;
black_box(pixels.clone().chunks_exact_mut(N).for_each(|simd| {
let simd: &mut [f32; N] = simd.try_into().unwrap();
*simd = colcon::srgb_eotf(*simd);
}));
//black_box(pixels.clone().iter_mut().for_each(|n| *n = colcon::srgb_eotf(*n)));
} ));

c.bench_function("srgb_eotf_inverse", |b| b.iter(|| {
Expand Down
168 changes: 161 additions & 7 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,147 @@
//! This crate references CIE Standard Illuminant D65 for functions to/from CIE XYZ
use core::ffi::{c_char, CStr};
//use core::cmp::PartialOrd;
//use core::ops::{Add, Div, Mul, Rem, Sub};

fn spowf(n: f32, power: f32) -> f32 {
n.abs().powf(power).copysign(n)
}

enum Cmp {
Gt,
Lt,
GtEq,
LtEq,
}

trait DType: Sized + Copy {
fn f32(b: f32) -> Self;
fn add(self, b: Self) -> Self;
fn sub(self, b: Self) -> Self;
fn div(self, b: Self) -> Self;
fn mul(self, b: Self) -> Self;
fn powf(self, b: Self) -> Self;
fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(
self,
b: Self,
cmp: Cmp,
x: F,
y: G,
) -> Self;
}

impl DType for f32 {
fn f32(b: f32) -> Self {
b
}

fn add(self, b: Self) -> Self {
self + b
}

fn sub(self, b: Self) -> Self {
self - b
}

fn div(self, b: Self) -> Self {
self / b
}

fn mul(self, b: Self) -> Self {
self * b
}

fn powf(self, b: Self) -> Self {
self.powf(b)
}

fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(
self,
b: Self,
cmp: Cmp,
x: F,
y: G,
) -> Self {
if match cmp {
Cmp::Gt => self > b,
Cmp::Lt => self < b,
Cmp::GtEq => self >= b,
Cmp::LtEq => self <= b,
} {
x()
} else {
y()
}
}
}

impl<const N: usize> DType for [f32; N] {
fn f32(object: f32) -> Self {
[object; N]
}

fn add(mut self, b: Self) -> Self {
self.iter_mut()
.zip(b.into_iter())
.for_each(|(a, b)| *a = *a + b);
self
}

fn sub(mut self, b: Self) -> Self {
self.iter_mut()
.zip(b.into_iter())
.for_each(|(a, b)| *a = *a - b);
self
}

fn div(mut self, b: Self) -> Self {
self.iter_mut()
.zip(b.into_iter())
.for_each(|(a, b)| *a = *a / b);
self
}

fn mul(mut self, b: Self) -> Self {
self.iter_mut()
.zip(b.into_iter())
.for_each(|(a, b)| *a = *a * b);
self
}

fn powf(mut self, b: Self) -> Self {
self.iter_mut()
.zip(b.into_iter())
.for_each(|(a, b)| *a = a.powf(b));
self
}

fn branch<F: FnOnce() -> Self, G: FnOnce() -> Self>(
mut self,
b: Self,
cmp: Cmp,
x: F,
y: G,
) -> Self {
self.iter_mut()
.zip(b.into_iter())
.zip(x().into_iter().zip(y().into_iter()))
.for_each(|((a, b), (x, y))| {
if match cmp {
Cmp::Gt => *a > b,
Cmp::Lt => *a < b,
Cmp::GtEq => *a >= b,
Cmp::LtEq => *a <= b,
} {
*a = x
} else {
*a = y
}
});
self
}
}

// ### CONSTS ### {{{

/// Standard Illuminant D65.
Expand Down Expand Up @@ -162,13 +298,31 @@ fn matmul3(matrix: [[f32; 3]; 3], pixel: [f32; 3]) -> [f32; 3] {
/// sRGB Electro-Optical Transfer Function
///
/// <https://en.wikipedia.org/wiki/SRGB#Computing_the_transfer_function>
#[no_mangle]
pub extern "C" fn srgb_eotf(n: f32) -> f32 {
if n <= SRGBEOTF_CHI {
n / SRGBEOTF_PHI
} else {
((n + SRGBEOTF_ALPHA) / (1.0 + SRGBEOTF_ALPHA)).powf(SRGBEOTF_GAMMA)
}
//#[no_mangle]
//pub fn srgb_eotf<T: DType>(n: T) -> T {
// if n <= SRGBEOTF_CHI.into() {
// n / SRGBEOTF_PHI.into()
// } else {
// ((n + SRGBEOTF_ALPHA.into()) / (SRGBEOTF_ALPHA + 1.0).into()).powf(SRGBEOTF_GAMMA.into())
// }
//}

pub fn srgb_eotf<T: DType>(n: T) -> T {
n.branch(
DType::f32(SRGBEOTF_CHI),
Cmp::LtEq,
|| n.div(DType::f32(SRGBEOTF_PHI)),
|| {
n.add(DType::f32(SRGBEOTF_ALPHA))
.div(DType::f32(SRGBEOTF_ALPHA + 1.0))
.powf(DType::f32(SRGBEOTF_GAMMA))
},
)
//if n <= SRGBEOTF_CHI.into() {
// n / SRGBEOTF_PHI.into()
//} else {
// ((n + SRGBEOTF_ALPHA.into()) / (SRGBEOTF_ALPHA + 1.0).into()).powf(SRGBEOTF_GAMMA.into())
//}
}

/// Inverse sRGB Electro-Optical Transfer Function
Expand Down

0 comments on commit 37882b9

Please sign in to comment.