Skip to content

Commit

Permalink
Reduce dot threshold at which lerp is used instead of slerp.
Browse files Browse the repository at this point in the history
Add lerp_impl which is reused to slerp and lerp but doesn't calc dot
again.
  • Loading branch information
bitshifter committed Aug 18, 2024
1 parent 913395d commit 2c7ae1d
Show file tree
Hide file tree
Showing 8 changed files with 160 additions and 112 deletions.
102 changes: 59 additions & 43 deletions codegen/templates/quat.rs.tera
Original file line number Diff line number Diff line change
Expand Up @@ -750,6 +750,51 @@ impl {{ self_t }} {
{{ vec4_t }}::from(self).abs_diff_eq({{ vec4_t }}::from(rhs), max_abs_diff)
}

#[inline(always)]
#[must_use]
fn lerp_impl(self, end: Self, s: {{ scalar_t }}) -> Self {
{% if is_scalar %}
let interpolated = self + ((end - self) * s);
interpolated.normalize()
{% elif is_sse2 %}
let start = self.0;
let end = end.0;
unsafe {
let interpolated = _mm_add_ps(
_mm_mul_ps(_mm_sub_ps(end, start), _mm_set_ps1(s)),
start,
);
{{ self_t }}(interpolated).normalize()
}
{% elif is_wasm32 %}
let start = self.0;
let end = end.0;
let interpolated = f32x4_add(
f32x4_mul(f32x4_sub(end, start), f32x4_splat(s)),
start,
);
{{ self_t }}(interpolated).normalize()
{% elif is_coresimd %}
let start = self.0;
let end = end.0;
let interpolated = start + ((end - start) * f32x4::splat(s));
{{ self_t }}(interpolated).normalize()
{% elif is_neon %}
const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
let start = self.0;
let end = end.0;
unsafe {
let interpolated = vaddq_f32(
vmulq_f32(vsubq_f32(end, start), vld1q_dup_f32(&s)),
start,
);
{{ self_t }}(interpolated).normalize()
}
{% else %}
unimplemented!()
{% endif %}
}

/// Performs a linear interpolation between `self` and `rhs` based on
/// the value `s`.
///
Expand All @@ -767,69 +812,41 @@ impl {{ self_t }} {
glam_assert!(end.is_normalized());

{% if is_scalar %}
let start = self;
let dot = start.dot(end);
let dot = self.dot(end);
let bias = if dot >= 0.0 { 1.0 } else { -1.0 };
let interpolated = start.add(end.mul(bias).sub(start).mul(s));
interpolated.normalize()
self.lerp_impl(end * bias, s)
{% elif is_sse2 %}
const NEG_ZERO: __m128 = m128_from_f32x4([-0.0; 4]);
let start = self.0;
let end = end.0;
unsafe {
let dot = dot4_into_m128(start, end);
let dot = dot4_into_m128(self.0, end.0);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = _mm_and_ps(dot, NEG_ZERO);
let interpolated = _mm_add_ps(
_mm_mul_ps(_mm_sub_ps(_mm_xor_ps(end, bias), start), _mm_set_ps1(s)),
start,
);
{{ self_t }}(interpolated).normalize()
self.lerp_impl(Self(_mm_xor_ps(end.0, bias)), s)
}
{% elif is_wasm32 %}
const NEG_ZERO: v128 = v128_from_f32x4([-0.0; 4]);
let start = self.0;
let end = end.0;
let dot = dot4_into_v128(start, end);
let dot = dot4_into_v128(self.0, end.0);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = v128_and(dot, NEG_ZERO);
let interpolated = f32x4_add(
f32x4_mul(f32x4_sub(v128_xor(end, bias), start), f32x4_splat(s)),
start,
);
{{ self_t }}(interpolated).normalize()
self.lerp_impl(Self(v128_xor(end.0, bias)), s)
{% elif is_coresimd %}
const NEG_ZERO: f32x4 = f32x4::from_array([-0.0; 4]);
let start = self.0;
let end = end.0;
let dot = dot4_into_f32x4(start, end);
let dot = dot4_into_f32x4(self.0, end.0);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = f32x4_bitand(dot, NEG_ZERO);
let interpolated = start + ((f32x4_bitxor(end, bias) - start) * f32x4::splat(s));
{{ self_t }}(interpolated).normalize()
self.lerp_impl(Self(f32x4_bitxor(end.0, bias)), s)
{% elif is_neon %}
const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
let start = self.0;
let end = end.0;
unsafe {
let dot = dot4_into_f32x4(start, end);
let dot = dot4_into_f32x4(self.0, end.0);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = vandq_u32(vreinterpretq_u32_f32(dot), vreinterpretq_u32_f32(NEG_ZERO));
let interpolated = vaddq_f32(
vmulq_f32(
vsubq_f32(
vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end), bias)),
start,
),
vld1q_dup_f32(&s),
),
start,
);
{{ self_t }}(interpolated).normalize()
self.lerp_impl(
Self(vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end), bias))), s)
}
{% else %}
unimplemented!()
Expand All @@ -852,8 +869,6 @@ impl {{ self_t }} {
glam_assert!(self.is_normalized());
glam_assert!(end.is_normalized());

const DOT_THRESHOLD: {{ scalar_t }} = 0.9995;

// Note that a rotation can be represented by two quaternions: `q` and
// `-q`. The slerp path between `q` and `end` will be different from the
// path between `-q` and `end`. One path will take the long way around and
Expand All @@ -866,9 +881,10 @@ impl {{ self_t }} {
dot = -dot;
}

const DOT_THRESHOLD: {{ scalar_t }} = 1.0 - {{ scalar_t }}::EPSILON;
if dot > DOT_THRESHOLD {
// assumes lerp returns a normalized quaternion
self.lerp(end, s)
// if above threshold perform linear interpolation to avoid divide by zero
self.lerp_impl(end, s)
} else {
let theta = math::acos_approx(dot);
{% if is_scalar %}
Expand Down
23 changes: 14 additions & 9 deletions src/f32/coresimd/quat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,15 @@ impl Quat {
Vec4::from(self).abs_diff_eq(Vec4::from(rhs), max_abs_diff)
}

#[inline(always)]
#[must_use]
fn lerp_impl(self, end: Self, s: f32) -> Self {
let start = self.0;
let end = end.0;
let interpolated = start + ((end - start) * f32x4::splat(s));
Quat(interpolated).normalize()
}

/// Performs a linear interpolation between `self` and `rhs` based on
/// the value `s`.
///
Expand All @@ -623,14 +632,11 @@ impl Quat {
glam_assert!(end.is_normalized());

const NEG_ZERO: f32x4 = f32x4::from_array([-0.0; 4]);
let start = self.0;
let end = end.0;
let dot = dot4_into_f32x4(start, end);
let dot = dot4_into_f32x4(self.0, end.0);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = f32x4_bitand(dot, NEG_ZERO);
let interpolated = start + ((f32x4_bitxor(end, bias) - start) * f32x4::splat(s));
Quat(interpolated).normalize()
self.lerp_impl(Self(f32x4_bitxor(end.0, bias)), s)
}

/// Performs a spherical linear interpolation between `self` and `end`
Expand All @@ -649,8 +655,6 @@ impl Quat {
glam_assert!(self.is_normalized());
glam_assert!(end.is_normalized());

const DOT_THRESHOLD: f32 = 0.9995;

// Note that a rotation can be represented by two quaternions: `q` and
// `-q`. The slerp path between `q` and `end` will be different from the
// path between `-q` and `end`. One path will take the long way around and
Expand All @@ -663,9 +667,10 @@ impl Quat {
dot = -dot;
}

const DOT_THRESHOLD: f32 = 1.0 - f32::EPSILON;
if dot > DOT_THRESHOLD {
// assumes lerp returns a normalized quaternion
self.lerp(end, s)
// if above threshold perform linear interpolation to avoid divide by zero
self.lerp_impl(end, s)
} else {
let theta = math::acos_approx(dot);

Expand Down
42 changes: 24 additions & 18 deletions src/f32/neon/quat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -611,6 +611,19 @@ impl Quat {
Vec4::from(self).abs_diff_eq(Vec4::from(rhs), max_abs_diff)
}

#[inline(always)]
#[must_use]
fn lerp_impl(self, end: Self, s: f32) -> Self {
const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
let start = self.0;
let end = end.0;
unsafe {
let interpolated =
vaddq_f32(vmulq_f32(vsubq_f32(end, start), vld1q_dup_f32(&s)), start);
Quat(interpolated).normalize()
}
}

/// Performs a linear interpolation between `self` and `rhs` based on
/// the value `s`.
///
Expand All @@ -628,24 +641,18 @@ impl Quat {
glam_assert!(end.is_normalized());

const NEG_ZERO: float32x4_t = f32x4_from_array([-0.0; 4]);
let start = self.0;
let end = end.0;
unsafe {
let dot = dot4_into_f32x4(start, end);
let dot = dot4_into_f32x4(self.0, end.0);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = vandq_u32(vreinterpretq_u32_f32(dot), vreinterpretq_u32_f32(NEG_ZERO));
let interpolated = vaddq_f32(
vmulq_f32(
vsubq_f32(
vreinterpretq_f32_u32(veorq_u32(vreinterpretq_u32_f32(end), bias)),
start,
),
vld1q_dup_f32(&s),
),
start,
);
Quat(interpolated).normalize()
self.lerp_impl(
Self(vreinterpretq_f32_u32(veorq_u32(
vreinterpretq_u32_f32(end),
bias,
))),
s,
)
}
}

Expand All @@ -665,8 +672,6 @@ impl Quat {
glam_assert!(self.is_normalized());
glam_assert!(end.is_normalized());

const DOT_THRESHOLD: f32 = 0.9995;

// Note that a rotation can be represented by two quaternions: `q` and
// `-q`. The slerp path between `q` and `end` will be different from the
// path between `-q` and `end`. One path will take the long way around and
Expand All @@ -679,9 +684,10 @@ impl Quat {
dot = -dot;
}

const DOT_THRESHOLD: f32 = 1.0 - f32::EPSILON;
if dot > DOT_THRESHOLD {
// assumes lerp returns a normalized quaternion
self.lerp(end, s)
// if above threshold perform linear interpolation to avoid divide by zero
self.lerp_impl(end, s)
} else {
let theta = math::acos_approx(dot);

Expand Down
20 changes: 12 additions & 8 deletions src/f32/scalar/quat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -618,6 +618,13 @@ impl Quat {
Vec4::from(self).abs_diff_eq(Vec4::from(rhs), max_abs_diff)
}

#[inline(always)]
#[must_use]
fn lerp_impl(self, end: Self, s: f32) -> Self {
let interpolated = self + ((end - self) * s);
interpolated.normalize()
}

/// Performs a linear interpolation between `self` and `rhs` based on
/// the value `s`.
///
Expand All @@ -634,11 +641,9 @@ impl Quat {
glam_assert!(self.is_normalized());
glam_assert!(end.is_normalized());

let start = self;
let dot = start.dot(end);
let dot = self.dot(end);
let bias = if dot >= 0.0 { 1.0 } else { -1.0 };
let interpolated = start.add(end.mul(bias).sub(start).mul(s));
interpolated.normalize()
self.lerp_impl(end * bias, s)
}

/// Performs a spherical linear interpolation between `self` and `end`
Expand All @@ -657,8 +662,6 @@ impl Quat {
glam_assert!(self.is_normalized());
glam_assert!(end.is_normalized());

const DOT_THRESHOLD: f32 = 0.9995;

// Note that a rotation can be represented by two quaternions: `q` and
// `-q`. The slerp path between `q` and `end` will be different from the
// path between `-q` and `end`. One path will take the long way around and
Expand All @@ -671,9 +674,10 @@ impl Quat {
dot = -dot;
}

const DOT_THRESHOLD: f32 = 1.0 - f32::EPSILON;
if dot > DOT_THRESHOLD {
// assumes lerp returns a normalized quaternion
self.lerp(end, s)
// if above threshold perform linear interpolation to avoid divide by zero
self.lerp_impl(end, s)
} else {
let theta = math::acos_approx(dot);

Expand Down
29 changes: 17 additions & 12 deletions src/f32/sse2/quat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,18 @@ impl Quat {
Vec4::from(self).abs_diff_eq(Vec4::from(rhs), max_abs_diff)
}

#[inline(always)]
#[must_use]
fn lerp_impl(self, end: Self, s: f32) -> Self {
let start = self.0;
let end = end.0;
unsafe {
let interpolated =
_mm_add_ps(_mm_mul_ps(_mm_sub_ps(end, start), _mm_set_ps1(s)), start);
Quat(interpolated).normalize()
}
}

/// Performs a linear interpolation between `self` and `rhs` based on
/// the value `s`.
///
Expand All @@ -631,18 +643,12 @@ impl Quat {
glam_assert!(end.is_normalized());

const NEG_ZERO: __m128 = m128_from_f32x4([-0.0; 4]);
let start = self.0;
let end = end.0;
unsafe {
let dot = dot4_into_m128(start, end);
let dot = dot4_into_m128(self.0, end.0);
// Calculate the bias, if the dot product is positive or zero, there is no bias
// but if it is negative, we want to flip the 'end' rotation XYZW components
let bias = _mm_and_ps(dot, NEG_ZERO);
let interpolated = _mm_add_ps(
_mm_mul_ps(_mm_sub_ps(_mm_xor_ps(end, bias), start), _mm_set_ps1(s)),
start,
);
Quat(interpolated).normalize()
self.lerp_impl(Self(_mm_xor_ps(end.0, bias)), s)
}
}

Expand All @@ -662,8 +668,6 @@ impl Quat {
glam_assert!(self.is_normalized());
glam_assert!(end.is_normalized());

const DOT_THRESHOLD: f32 = 0.9995;

// Note that a rotation can be represented by two quaternions: `q` and
// `-q`. The slerp path between `q` and `end` will be different from the
// path between `-q` and `end`. One path will take the long way around and
Expand All @@ -676,9 +680,10 @@ impl Quat {
dot = -dot;
}

const DOT_THRESHOLD: f32 = 1.0 - f32::EPSILON;
if dot > DOT_THRESHOLD {
// assumes lerp returns a normalized quaternion
self.lerp(end, s)
// if above threshold perform linear interpolation to avoid divide by zero
self.lerp_impl(end, s)
} else {
let theta = math::acos_approx(dot);

Expand Down
Loading

0 comments on commit 2c7ae1d

Please sign in to comment.