From 071425a0ae8acf0e8555133666d963e5ff2e6eed Mon Sep 17 00:00:00 2001 From: maihd Date: Mon, 1 Apr 2024 17:03:35 +0700 Subject: [PATCH] group common functions in one file --- include/vectormath.h | 4 +- include/vectormath/scalarmath.h | 2 +- include/vectormath/vectormath_clang.h | 640 +---------------- include/vectormath/vectormath_common.h | 489 +++++++++++++ include/vectormath/vectormath_scalar.h | 391 +---------- include/vectormath/vectormath_simd.h | 647 +----------------- include/vectormath/vectormath_simd_utils.h | 286 ++++++++ include/vectormath/vectormath_types.h | 2 +- premake5.lua | 4 +- scripts/vectormath.premake5.lua | 4 + .../app/build.gradle | 2 +- 11 files changed, 790 insertions(+), 1681 deletions(-) create mode 100644 include/vectormath/vectormath_common.h create mode 100644 include/vectormath/vectormath_simd_utils.h diff --git a/include/vectormath.h b/include/vectormath.h index e80dc9d..29f03a3 100644 --- a/include/vectormath.h +++ b/include/vectormath.h @@ -5,9 +5,7 @@ #if VECTORMATH_ENABLE_CLANG_EXT # include "vectormath/vectormath_clang.h" #elif VECTORMATH_SIMD_ENABLE -# if VECTORMATH_NEON_SUPPORT -# include "vectormath/sse_to_neon.h" -# endif +// #if VECTORMATH_SIMD_ENABLE # include "vectormath/vectormath_simd.h" #else # include "vectormath/vectormath_scalar.h" diff --git a/include/vectormath/scalarmath.h b/include/vectormath/scalarmath.h index 539fbb3..8a45de5 100644 --- a/include/vectormath/scalarmath.h +++ b/include/vectormath/scalarmath.h @@ -418,7 +418,7 @@ __forceinline float float_fast_rsqrt(float x) cvt.f = x; cvt.i = 0x5f3759df - (cvt.i >> 1); cvt.f = cvt.f * (1.5f - xhalf * cvt.f * cvt.f); // first approximation - // cvt.f = cvt.f * (1.5f - xhalf * cvt.f * cvt.f); // second approximation + cvt.f = cvt.f * (1.5f - xhalf * cvt.f * cvt.f); // second approximation return cvt.f; } diff --git a/include/vectormath/vectormath_clang.h b/include/vectormath/vectormath_clang.h index 910288c..5221a4d 100644 --- a/include/vectormath/vectormath_clang.h +++ b/include/vectormath/vectormath_clang.h @@ -2,6 +2,8 @@ #include "scalarmath.h" #include "vectormath_types.h" +#include "vectormath_common.h" +#include "vectormath_simd_utils.h" // Make sure clang extensions is enable #if !VECTORMATH_ENABLE_CLANG_EXT @@ -16,264 +18,6 @@ // Helper for extensions #define VECTORMATH_FUNCTIONS_DEFINED -// ------------------------------------------------------------- -// Helper constants -// ------------------------------------------------------------- - -// Small epsilon value -constexpr float SIMD_SLERP_TOL = 0.999f; - -// Common constants used to evaluate simdSinf/cosf4/tanf4 -constexpr float SIMD_SINCOS_CC0 = -0.0013602249f; -constexpr float SIMD_SINCOS_CC1 = 0.0416566950f; -constexpr float SIMD_SINCOS_CC2 = -0.4999990225f; -constexpr float SIMD_SINCOS_SC0 = -0.0001950727f; -constexpr float SIMD_SINCOS_SC1 = 0.0083320758f; -constexpr float SIMD_SINCOS_SC2 = -0.1666665247f; -constexpr float SIMD_SINCOS_KC1 = 1.57079625129f; -constexpr float SIMD_SINCOS_KC2 = 7.54978995489e-8f; - -// Shorthand functions to get the unit vectors as __m128 -__forceinline __m128 m128_unit_1000() { return _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f); } -__forceinline __m128 m128_unit_0100() { return _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f); } -__forceinline __m128 m128_unit_0010() { return _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f); } -__forceinline __m128 m128_unit_0001() { return _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f); } - -// ======================================================== -// Internal helper types and functions -// ======================================================== - -// These have to be macros because _MM_SHUFFLE() requires compile-time constants. -#define m128_ror(vec, i) (((i) % 4) ? (_mm_shuffle_ps(vec, vec, _MM_SHUFFLE((uint32_t)(i + 3) % 4, (uint32_t)(i + 2) % 4, (uint32_t)(i + 1) % 4, (uint32_t)(i + 0) % 4))) : (vec)) -#define m128_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e)) -#define m128_sld(vec, vec2, x) m128_ror(vec, ((x) / 4)) - -__forceinline __m128 m128_mul_add(__m128 a, __m128 b, __m128 c) -{ - return _mm_add_ps(c, _mm_mul_ps(a, b)); -} - -__forceinline __m128 m128_mul_sub(__m128 a, __m128 b, __m128 c) -{ - return _mm_sub_ps(c, _mm_mul_ps(a, b)); -} - -__forceinline __m128 m128_merge_hi(__m128 a, __m128 b) -{ - return _mm_unpacklo_ps(a, b); -} - -__forceinline __m128 m128_merge_lo(__m128 a, __m128 b) -{ - return _mm_unpackhi_ps(a, b); -} - -__forceinline __m128 m128_select(__m128 a, __m128 b, __m128 mask) -{ - return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); -} - -__forceinline __m128 m128_negatef(__m128 x) -{ - return _mm_sub_ps(_mm_setzero_ps(), x); -} - -__forceinline __m128 m128_fabsf(__m128 x) -{ - return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); - //return _mm_andnot_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); -} - -__forceinline __m128 m128_acosf(__m128 x) -{ - const __m128 xabs = m128_fabsf(x); - const __m128 select = _mm_cmplt_ps(x, _mm_setzero_ps()); - const __m128 t1 = _mm_sqrt_ps(_mm_sub_ps(_mm_set1_ps(1.0f), xabs)); - - /* Instruction counts can be reduced if the polynomial was - * computed entirely from nested (dependent) fma's. However, - * to reduce the number of pipeline stalls, the polygon is evaluated - * in two halves (hi and lo). - */ - const __m128 xabs2 = _mm_mul_ps(xabs, xabs); - const __m128 xabs4 = _mm_mul_ps(xabs2, xabs2); - - const __m128 hi = m128_mul_add(m128_mul_add(m128_mul_add(_mm_set1_ps(-0.0012624911f), - xabs, _mm_set1_ps( 0.0066700901f)), - xabs, _mm_set1_ps(-0.0170881256f)), - xabs, _mm_set1_ps( 0.0308918810f)); - - const __m128 lo = m128_mul_add(m128_mul_add(m128_mul_add(_mm_set1_ps(-0.0501743046f), - xabs, _mm_set1_ps( 0.0889789874f)), - xabs, _mm_set1_ps(-0.2145988016f)), - xabs, _mm_set1_ps( 1.5707963050f)); - - const __m128 result = m128_mul_add(hi, xabs4, lo); - - // Adjust the result if x is negative. - return m128_select( - _mm_mul_ps(t1, result), // Positive - m128_mul_sub(t1, result, _mm_set1_ps(3.1415926535898f)), // Negative - select - ); -} - -__forceinline __m128 m128_cosf(__m128 x) -{ - // Range reduction - // Find the quadrant the angle falls in - // Algo: - // xl = angle * TwoOverPi; - // q = (int32_t)(ceil(abs(xl)) * sign(xl)) - const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); - const __m128i offset_cos = _mm_add_epi32(_mm_set1_epi32(1), offset_sin); - - // Remainder in range [-pi/4..pi/4] - const __m128 qf = _mm_cvtepi32_ps(q); - const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); - - // Compute x^2 and x^3 - const __m128 xl2 = _mm_mul_ps(xl, xl); - const __m128 xl3 = _mm_mul_ps(xl2, xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - const __m128 cx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), - xl2, _mm_set1_ps(1.0f)); - - const __m128 sx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), - xl3, xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - __m128 cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - - __m128 cos = m128_select(cx, sx, cos_mask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - - // Write the result - return m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), cos), cos, cos_mask); -} - -__forceinline __m128 m128_sinf(__m128 x) -{ - // Range reduction - // Find the quadrant the angle falls in - // Algo: - // xl = angle * TwoOverPi; - // q = (int32_t)(ceil(abs(xl)) * sign(xl)) - const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); - - // Remainder in range [-pi/4..pi/4] - const __m128 qf = _mm_cvtepi32_ps(q); - const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); - - // Compute x^2 and x^3 - const __m128 xl2 = _mm_mul_ps(xl, xl); - const __m128 xl3 = _mm_mul_ps(xl2, xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - const __m128 cx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), - xl2, _mm_set1_ps(1.0f)); - - const __m128 sx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), - xl3, xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - __m128 sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - - __m128 sin = m128_select(cx, sx, sin_mask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - - // Write the result - return m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), sin), sin, sin_mask); -} - -__forceinline void m128_sinf_cosf(__m128 x, __m128* out_sin, __m128* out_cos) -{ - // Range reduction - // Find the quadrant the angle falls in - // Algo: - // xl = angle * TwoOverPi; - // q = (int32_t)(ceil(abs(xl)) * sign(xl)) - const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); - const __m128i offset_cos = _mm_add_epi32(_mm_set1_epi32(1), offset_sin); - - // Remainder in range [-pi/4..pi/4] - const __m128 qf = _mm_cvtepi32_ps(q); - const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); - - // Compute x^2 and x^3 - const __m128 xl2 = _mm_mul_ps(xl, xl); - const __m128 xl3 = _mm_mul_ps(xl2, xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - const __m128 cx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), - xl2, _mm_set1_ps(1.0f)); - - const __m128 sx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), - xl3, xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - __m128 sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - __m128 cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - - __m128 sin = m128_select(cx, sx, sin_mask); - __m128 cos = m128_select(cx, sx, cos_mask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - - // Write the result - *out_sin = m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), sin), sin, sin_mask); - *out_cos = m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), cos), cos, cos_mask); -} - // ------------------------------------------------------------- // Constructors // ------------------------------------------------------------- @@ -284,26 +28,6 @@ __forceinline __m128 m128_from_vec3(vec3 v) return _mm_set_ps(0.0f, v.z, v.y, v.x); } -/// Create a new vector 2D -__forceinline vec2 vec2_new(float x, float y) -{ - vec2 result = { x, y }; - return result; -} - -/// Create a new vector 2D, with components have same value -__forceinline vec2 vec2_new1(float s) -{ - vec2 result = { s, s }; - return result; -} - -/// Create a new vector 2D from a vector 3D -__forceinline vec2 vec2_from_vec3(vec3 v) -{ - return v.xy; -} - /// Create a new vector 3D __forceinline vec3 vec3_new(float x, float y, float z) { @@ -432,71 +156,6 @@ __forceinline mat4 mat4_load(const float ptr[]) // Operators-like functions // ------------------------------------------------------------- -__forceinline vec2 vec2_neg(vec2 v) -{ - return vec2_new(-v.x, -v.y); -} - -__forceinline vec2 vec2_add(vec2 a, vec2 b) -{ - return vec2_new(a.x + b.x, a.y + b.y); -} - -__forceinline vec2 vec2_sub(vec2 a, vec2 b) -{ - return vec2_new(a.x - b.x, a.y - b.y); -} - -__forceinline vec2 vec2_mul(vec2 a, vec2 b) -{ - return vec2_new(a.x * b.x, a.y * b.y); -} - -__forceinline vec2 vec2_div(vec2 a, vec2 b) -{ - return vec2_new(a.x / b.x, a.y / b.y); -} - -__forceinline vec2 vec2_add1(vec2 a, float b) -{ - return vec2_add(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_sub1(vec2 a, float b) -{ - return vec2_sub(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_mul1(vec2 a, float b) -{ - return vec2_mul(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_div1(vec2 a, float b) -{ - return vec2_div(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_mul_add(vec2 a, vec2 b, vec2 c) -{ - return vec2_add(c, vec2_mul(a, b)); -} - -__forceinline vec2 vec2_mul_sub(vec2 a, vec2 b, vec2 c) -{ - return vec2_sub(c, vec2_mul(a, b)); -} - -__forceinline bool vec2_equal(vec2 a, vec2 b) -{ - return a.x == b.x && a.y == b.y; -} - -__forceinline bool vec2_not_equal(vec2 a, vec2 b) -{ - return a.x != b.x || a.y != b.y; -} - __forceinline vec3 vec3_neg(vec3 v) { return -v; @@ -704,301 +363,6 @@ __forceinline bool mat4_not_equal(mat4 a, mat4 b) // Functions // ------------------------------------------------------------- -/// Computes sign of 'x' -__forceinline ivec2 vec2_sign(vec2 v) -{ - ivec2 result; - result.x = float_sign(v.x); - result.y = float_sign(v.y); - return result; -} - -/// Computes absolute value -__forceinline vec2 vec2_abs(vec2 v) -{ - return vec2_new(fabsf(v.x), fabsf(v.y)); -} - -/// Computes cosine -__forceinline vec2 vec2_cos(vec2 v) -{ - return vec2_new(cosf(v.x), cosf(v.y)); -} - -/// Computes sine -__forceinline vec2 vec2_sin(vec2 v) -{ - return vec2_new(sinf(v.x), sinf(v.y)); -} - -/// Computes tangent -__forceinline vec2 vec2_tan(vec2 v) -{ - return vec2_new(tanf(v.x), tanf(v.y)); -} - -/// Computes hyperbolic cosine -__forceinline vec2 vec2_cosh(vec2 v) -{ - return vec2_new(coshf(v.x), coshf(v.y)); -} - -/// Computes hyperbolic sine -__forceinline vec2 vec2_sinh(vec2 v) -{ - return vec2_new(sinhf(v.x), sinhf(v.y)); -} - -/// Computes hyperbolic tangent -__forceinline vec2 vec2_tanh(vec2 v) -{ - return vec2_new(tanhf(v.x), tanhf(v.y)); -} - -/// Computes inverse cosine -__forceinline vec2 vec2_acos(vec2 v) -{ - return vec2_new(acosf(v.x), acosf(v.y)); -} - -/// Computes inverse sine -__forceinline vec2 vec2_asin(vec2 v) -{ - return vec2_new(asinf(v.x), asinf(v.y)); -} - -/// Computes inverse tangent -__forceinline vec2 vec2_atan(vec2 v) -{ - return vec2_new(atanf(v.x), atanf(v.y)); -} - -/// Computes inverse tangent with 2 args -__forceinline vec2 vec2_atan2(vec2 a, vec2 b) -{ - return vec2_new(atan2f(a.x, b.x), atan2f(a.y, b.y)); -} - -/// Computes Euler number raised to the power 'x' -__forceinline vec2 vec2_exp(vec2 v) -{ - return vec2_new(expf(v.x), expf(v.y)); -} - -/// Computes 2 raised to the power 'x' -__forceinline vec2 vec2_exp2(vec2 v) -{ - return vec2_new(exp2f(v.x), exp2f(v.y)); -} - -/// Computes the base Euler number logarithm -__forceinline vec2 vec2_log(vec2 v) -{ - return vec2_new(logf(v.x), logf(v.y)); -} - -/// Computes the base 2 logarithm -__forceinline vec2 vec2_log2(vec2 v) -{ - return vec2_new(log2f(v.x), log2f(v.y)); -} - -/// Computes the base 10 logarithm -__forceinline vec2 vec2_log10(vec2 v) -{ - return vec2_new(log10f(v.x), log10f(v.y)); -} - -/// Computes the value of base raised to the power exponent -__forceinline vec2 vec2_pow(vec2 a, vec2 b) -{ - return vec2_new(powf(a.x, b.x), powf(a.y, b.y)); -} - -/// Get the fractal part of floating point -__forceinline vec2 vec2_frac(vec2 v) -{ - return vec2_new(float_frac(v.x), float_frac(v.y)); -} - -/// Computes the floating-point remainder of the division operation x/y -__forceinline vec2 vec2_fmod(vec2 a, vec2 b) -{ - return vec2_new(fmodf(a.x, b.x), fmodf(a.y, b.y)); -} - -/// Computes the smallest integer value not less than 'x' -__forceinline vec2 vec2_ceil(vec2 v) -{ - return vec2_new(ceilf(v.x), ceilf(v.y)); -} - -/// Computes the largest integer value not greater than 'x' -__forceinline vec2 vec2_floor(vec2 v) -{ - return vec2_new(floorf(v.x), floorf(v.y)); -} - -/// Computes the nearest integer value -__forceinline vec2 vec2_round(vec2 v) -{ - return vec2_new(roundf(v.x), roundf(v.y)); -} - -/// Computes the nearest integer not greater in magnitude than 'x' -__forceinline vec2 vec2_trunc(vec2 v) -{ - return vec2_new(truncf(v.x), truncf(v.y)); -} - -/// Get the smaller value -__forceinline vec2 vec2_min(vec2 a, vec2 b) -{ - return vec2_new(float_min(a.x, b.x), float_min(a.y, b.y)); -} - -/// Get the larger value -__forceinline vec2 vec2_max(vec2 a, vec2 b) -{ - return vec2_new(float_max(a.x, b.x), float_max(a.y, b.y)); -} - -/// Clamps the 'x' value to the [min, max]. -__forceinline vec2 vec2_clamp(vec2 v, vec2 min, vec2 max) -{ - return vec2_new(float_clamp(v.x, min.x, max.x), float_clamp(v.y, min.y, max.y)); -} - -/// Clamps the specified value within the range of 0 to 1 -__forceinline vec2 vec2_saturate(vec2 v) -{ - return vec2_new(float_saturate(v.x), float_saturate(v.y)); -} - -/// Compares two values, returning 0 or 1 based on which value is greater. -__forceinline vec2 vec2_step(vec2 a, vec2 b) -{ - return vec2_new(float_step(a.x, b.x), float_step(a.y, b.y)); -} - -/// Performs a linear interpolation. -__forceinline vec2 vec2_lerp(vec2 a, vec2 b, vec2 t) -{ - return vec2_new(float_lerp(a.x, b.x, t.x), float_lerp(a.y, b.y, t.y)); -} - -/// Performs a linear interpolation. -__forceinline vec2 vec2_lerp1(vec2 a, vec2 b, float t) -{ - return vec2_new(float_lerp(a.x, b.x, t), float_lerp(a.y, b.y, t)); -} - -/// Compute a smooth Hermite interpolation -__forceinline vec2 vec2_smoothstep(vec2 a, vec2 b, vec2 t) -{ - return vec2_new(float_smoothstep(a.x, b.x, t.x), float_smoothstep(a.y, b.y, t.y)); -} - -/// Computes square root of 'x'. -__forceinline vec2 vec2_sqrt(vec2 v) -{ - return vec2_new(sqrtf(v.x), sqrtf(v.y)); -} - -/// Computes inverse square root of 'x'. -__forceinline vec2 vec2_rsqrt(vec2 v) -{ - return vec2_new(float_rsqrt(v.x), float_rsqrt(v.y)); -} - -/// Compute dot product of two vectors -__forceinline float vec2_dot(vec2 a, vec2 b) -{ - return a.x * b.x + a.y * b.y; -} - -/// Compute squared length of vector -__forceinline float vec2_lensqr(vec2 v) -{ - return vec2_dot(v, v); -} - -/// Compute length of vector -__forceinline float vec2_length(vec2 v) -{ - return sqrtf(vec2_lensqr(v)); -} - -/// Compute distance from 'a' to b -__forceinline float vec2_distance(vec2 a, vec2 b) -{ - return vec2_length(vec2_sub(a, b)); -} - -/// Compute squared distance from 'a' to b -__forceinline float vec2_distsqr(vec2 a, vec2 b) -{ - return vec2_lensqr(vec2_sub(a, b)); -} - -/// Compute normalized vector -__forceinline vec2 vec2_normalize(vec2 v) -{ - const float lsqr = vec2_lensqr(v); - if (lsqr > 0.0f) - { - const float f = 1.0f / sqrtf(lsqr); - return vec2_new(v.x * f, v.y * f); - } - else - { - return v; - } -} - -/// Compute reflection vector -__forceinline vec2 vec2_reflect(vec2 v, vec2 n) -{ - return vec2_sub(v, vec2_mul1(n, 2.0f * vec2_dot(v, n))); -} - -/// Compute refraction vector -__forceinline vec2 vec2_refract(vec2 v, vec2 n, float eta) -{ - const float k = 1.0f - eta * eta * (1.0f - vec2_dot(v, n) * vec2_dot(v, n)); - return k < 0.0f - ? vec2_new1(0.0f) - : vec2_sub(vec2_mul1(v, eta), vec2_mul1(v, (eta * vec2_dot(v, n) + sqrtf(k)))); -} - -/// Compute faceforward vector -__forceinline vec2 vec2_faceforward(vec2 n, vec2 i, vec2 nref) -{ - return vec2_dot(i, nref) < 0.0f ? n : vec2_neg(n); -} - -/// Compute angle vector -__forceinline float vec2_angle(vec2 v) -{ - return atan2f(v.y, v.x); -} - -/// Compute angle vector in degrees -__forceinline float vec2_angle_deg(vec2 v) -{ - return float_rad2deg(vec2_angle(v)); -} - -/// Create new vector with angle and length -__forceinline vec2 vec2_from_angle(float angle, float length) { - return vec2_new(cosf(angle) * length, sinf(angle) * length); -} - -/// Create new vector with angle in degrees, and length -__forceinline vec2 vec2_from_angle_deg(float angle, float length) { - return vec2_from_angle(float_rad2deg(angle), length); -} - /// Computes sign of 'x' __forceinline ivec3 vec3_sign(vec3 v) { diff --git a/include/vectormath/vectormath_common.h b/include/vectormath/vectormath_common.h new file mode 100644 index 0000000..0b6fd3f --- /dev/null +++ b/include/vectormath/vectormath_common.h @@ -0,0 +1,489 @@ +#pragma once + +#include "scalarmath.h" +#include "vectormath_types.h" + +// ------------------------------------------------------------- +// Constructors +// ------------------------------------------------------------- + + +/// Create a new vector 2D +__forceinline vec2 vec2_new(float x, float y) +{ + vec2 result; + result.x = x; + result.y = y; + return result; +} + + +/// Create a new vector 2D with 2 components have same value +__forceinline vec2 vec2_new1(float s) +{ + vec2 result; + result.x = s; + result.y = s; + return result; +} + + +/// Create a new vector 2D from a vector 3D +__forceinline vec2 vec2_from_vec3(vec3 v) +{ + return v.xy; +} + +// ------------------------------------------------------------- +// Operators-like functions +// ------------------------------------------------------------- + + +/// Compute negate value of v +__forceinline vec2 vec2_neg(vec2 v) +{ + return vec2_new(-v.x, -v.y); +} + + +/// Add two vectors +__forceinline vec2 vec2_add(vec2 a, vec2 b) +{ + return vec2_new(a.x + b.x, a.y + b.y); +} + +/// Subtract two vectors +__forceinline vec2 vec2_sub(vec2 a, vec2 b) +{ + return vec2_new(a.x - b.x, a.y - b.y); +} + + +/// Multiply two vectors +__forceinline vec2 vec2_mul(vec2 a, vec2 b) +{ + return vec2_new(a.x * b.x, a.y * b.y); +} + + +/// Divide two vectors +__forceinline vec2 vec2_div(vec2 a, vec2 b) +{ + return vec2_new(a.x / b.x, a.y / b.y); +} + + +/// Add vector with scalar +__forceinline vec2 vec2_add1(vec2 a, float b) +{ + return vec2_add(a, vec2_new1(b)); +} + + +/// Subtract vector with scalar +__forceinline vec2 vec2_sub1(vec2 a, float b) +{ + return vec2_sub(a, vec2_new1(b)); +} + + +/// Multiply vector with scalar +__forceinline vec2 vec2_mul1(vec2 a, float b) +{ + return vec2_mul(a, vec2_new1(b)); +} + + +/// Divide vector with scalar +__forceinline vec2 vec2_div1(vec2 a, float b) +{ + return vec2_div(a, vec2_new1(b)); +} + + +/// Compute Mul-Add algorithm: +/// mul_add(a, b, c) = c - a * b +__forceinline vec2 vec2_mul_add(vec2 a, vec2 b, vec2 c) +{ + return vec2_add(c, vec2_mul(a, b)); +} + + +/// Compute Mul-Sub algorithm: +/// mul_sub(a, b, c) = c - a * b +__forceinline vec2 vec2_mul_sub(vec2 a, vec2 b, vec2 c) +{ + return vec2_sub(c, vec2_mul(a, b)); +} + + +/// Check if two vectors are equal +__forceinline bool vec2_equal(vec2 a, vec2 b) +{ + return a.x == b.x && a.y == b.y; +} + + +/// Check if two vectors are not equal (faster than !vec2_equal) +__forceinline bool vec2_not_equal(vec2 a, vec2 b) +{ + return a.x != b.x || a.y != b.y; +} + + +/// Check if two vectors are closely equal +__forceinline bool vec2_isclose(vec2 a, vec2 b) +{ + return float_isclose(a.x, b.x) + && float_isclose(a.y, b.y); +} + + +// ------------------------------------------------------------- +// Functions +// ------------------------------------------------------------- + + +/// Computes sign of 'x' +__forceinline ivec2 vec2_sign(vec2 v) +{ + ivec2 result = { float_sign(v.x), float_sign(v.y) }; + return result; +} + + +/// Computes absolute value +__forceinline vec2 vec2_abs(vec2 v) +{ + return vec2_new(fabsf(v.x), fabsf(v.y)); +} + + +/// Computes cosine +__forceinline vec2 vec2_cos(vec2 v) +{ + return vec2_new(cosf(v.x), cosf(v.y)); +} + + +/// Computes sine +__forceinline vec2 vec2_sin(vec2 v) +{ + return vec2_new(sinf(v.x), sinf(v.y)); +} + + +/// Computes tangent +__forceinline vec2 vec2_tan(vec2 v) +{ + return vec2_new(tanf(v.x), tanf(v.y)); +} + + +/// Computes hyperbolic cosine +__forceinline vec2 vec2_cosh(vec2 v) +{ + return vec2_new(coshf(v.x), coshf(v.y)); +} + + +/// Computes hyperbolic sine +__forceinline vec2 vec2_sinh(vec2 v) +{ + return vec2_new(sinhf(v.x), sinhf(v.y)); +} + + +/// Computes hyperbolic tangent +__forceinline vec2 vec2_tanh(vec2 v) +{ + return vec2_new(tanhf(v.x), tanhf(v.y)); +} + + +/// Computes inverse cosine +__forceinline vec2 vec2_acos(vec2 v) +{ + return vec2_new(acosf(v.x), acosf(v.y)); +} + + +/// Computes inverse sine +__forceinline vec2 vec2_asin(vec2 v) +{ + return vec2_new(asinf(v.x), asinf(v.y)); +} + + +/// Computes inverse tangent +__forceinline vec2 vec2_atan(vec2 v) +{ + return vec2_new(atanf(v.x), atanf(v.y)); +} + + +/// Computes inverse tangent with 2 args +__forceinline vec2 vec2_atan2(vec2 a, vec2 b) +{ + return vec2_new(atan2f(a.x, b.x), atan2f(a.y, b.y)); +} + + +/// Computes Euler number raised to the power 'x' +__forceinline vec2 vec2_exp(vec2 v) +{ + return vec2_new(expf(v.x), expf(v.y)); +} + + +/// Computes 2 raised to the power 'x' +__forceinline vec2 vec2_exp2(vec2 v) +{ + return vec2_new(exp2f(v.x), exp2f(v.y)); +} + + +/// Computes the base Euler number logarithm +__forceinline vec2 vec2_log(vec2 v) +{ + return vec2_new(logf(v.x), logf(v.y)); +} + + +/// Computes the base 2 logarithm +__forceinline vec2 vec2_log2(vec2 v) +{ + return vec2_new(log2f(v.x), log2f(v.y)); +} + + +/// Computes the base 10 logarithm +__forceinline vec2 vec2_log10(vec2 v) +{ + return vec2_new(log10f(v.x), log10f(v.y)); +} + + +/// Computes the value of base raised to the power exponent +__forceinline vec2 vec2_pow(vec2 a, vec2 b) +{ + return vec2_new(powf(a.x, b.x), powf(a.y, b.y)); +} + + +/// Get the fractal part of floating point +__forceinline vec2 vec2_frac(vec2 v) +{ + return vec2_new(float_frac(v.x), float_frac(v.y)); +} + + +/// Computes the floating-point remainder of the division operation x/y +__forceinline vec2 vec2_fmod(vec2 a, vec2 b) +{ + return vec2_new(fmodf(a.x, b.x), fmodf(a.y, b.y)); +} + + +/// Computes the smallest integer value not less than 'x' +__forceinline vec2 vec2_ceil(vec2 v) +{ + return vec2_new(ceilf(v.x), ceilf(v.y)); +} + + +/// Computes the largest integer value not greater than 'x' +__forceinline vec2 vec2_floor(vec2 v) +{ + return vec2_new(floorf(v.x), floorf(v.y)); +} + + +/// Computes the nearest integer value +__forceinline vec2 vec2_round(vec2 v) +{ + return vec2_new(roundf(v.x), roundf(v.y)); +} + + +/// Computes the nearest integer not greater in magnitude than 'x' +__forceinline vec2 vec2_trunc(vec2 v) +{ + return vec2_new(truncf(v.x), truncf(v.y)); +} + + +/// Get the smaller value +__forceinline vec2 vec2_min(vec2 a, vec2 b) +{ + return vec2_new(float_min(a.x, b.x), float_min(a.y, b.y)); +} + + +/// Get the larger value +__forceinline vec2 vec2_max(vec2 a, vec2 b) +{ + return vec2_new(float_max(a.x, b.x), float_max(a.y, b.y)); +} + + +/// Clamps the 'x' value to the [min, max]. +__forceinline vec2 vec2_clamp(vec2 v, vec2 min, vec2 max) +{ + return vec2_new(float_clamp(v.x, min.x, max.x), float_clamp(v.y, min.y, max.y)); +} + + +/// Clamps the specified value within the range of 0 to 1 +__forceinline vec2 vec2_saturate(vec2 v) +{ + return vec2_new(float_saturate(v.x), float_saturate(v.y)); +} + + +/// Compares two values, returning 0 or 1 based on which value is greater. +__forceinline vec2 vec2_step(vec2 a, vec2 b) +{ + return vec2_new(float_step(a.x, b.x), float_step(a.y, b.y)); +} + + +/// Performs a linear interpolation. +__forceinline vec2 vec2_lerp(vec2 a, vec2 b, vec2 t) +{ + return vec2_new(float_lerp(a.x, b.x, t.x), float_lerp(a.y, b.y, t.y)); +} + + +/// Performs a linear interpolation. +__forceinline vec2 vec2_lerp1(vec2 a, vec2 b, float t) +{ + return vec2_new(float_lerp(a.x, b.x, t), float_lerp(a.y, b.y, t)); +} + + +/// Compute a smooth Hermite interpolation +__forceinline vec2 vec2_smoothstep(vec2 a, vec2 b, vec2 t) +{ + return vec2_new(float_smoothstep(a.x, b.x, t.x), float_smoothstep(a.y, b.y, t.y)); +} + + +/// Computes square root of 'x'. +__forceinline vec2 vec2_sqrt(vec2 v) +{ + return vec2_new(sqrtf(v.x), sqrtf(v.y)); +} + + +/// Computes inverse square root of 'x'. +__forceinline vec2 vec2_rsqrt(vec2 v) +{ + return vec2_new(float_rsqrt(v.x), float_rsqrt(v.y)); +} + + +/// Compute dot product of two vectors +__forceinline float vec2_dot(vec2 a, vec2 b) +{ + return a.x * b.x + a.y * b.y; +} + + +/// Compute squared length of vector +__forceinline float vec2_lensqr(vec2 v) +{ + return vec2_dot(v, v); +} + + +/// Compute length of vector +__forceinline float vec2_length(vec2 v) +{ + return sqrtf(vec2_lensqr(v)); +} + + +/// Compute distance from 'a' to b +__forceinline float vec2_distance(vec2 a, vec2 b) +{ + return vec2_length(vec2_sub(a, b)); +} + + +/// Compute squared distance from 'a' to b +__forceinline float vec2_distsqr(vec2 a, vec2 b) +{ + return vec2_lensqr(vec2_sub(a, b)); +} + + +/// Compute normalized vector +__forceinline vec2 vec2_normalize(vec2 v) +{ + const float lsqr = vec2_lensqr(v); + if (lsqr > 0.0f) + { + const float f = 1.0f / sqrtf(lsqr); + return vec2_new(v.x * f, v.y * f); + } + else + { + return v; + } +} + + +/// Compute reflection vector +__forceinline vec2 vec2_reflect(vec2 v, vec2 n) +{ + return vec2_sub(v, vec2_mul1(n, 2.0f * vec2_dot(v, n))); +} + + +/// Compute refraction vector +__forceinline vec2 vec2_refract(vec2 v, vec2 n, float eta) +{ + const float k = 1.0f - eta * eta * (1.0f - vec2_dot(v, n) * vec2_dot(v, n)); + return k < 0.0f + ? vec2_new1(0.0f) + : vec2_sub(vec2_mul1(v, eta), vec2_mul1(v, (eta * vec2_dot(v, n) + sqrtf(k)))); +} + + +/// Compute faceforward vector +__forceinline vec2 vec2_faceforward(vec2 n, vec2 i, vec2 nref) +{ + return vec2_dot(i, nref) < 0.0f ? n : vec2_neg(n); +} + + +/// Compute angle vector +__forceinline float vec2_angle(vec2 v) +{ + return atan2f(v.y, v.x); +} + + +/// Compute angle vector in degrees +__forceinline float vec2_angle_deg(vec2 v) +{ + return float_rad2deg(vec2_angle(v)); +} + + +/// Create new vector with angle and length +__forceinline vec2 vec2_from_angle(float angle, float length) +{ + return vec2_new(cosf(angle) * length, sinf(angle) * length); +} + + +/// Create new vector with angle in degrees, and length +__forceinline vec2 vec2_from_angle_deg(float angle, float length) +{ + return vec2_from_angle(float_rad2deg(angle), length); +} + +//! LEAVE AN EMPTY LINE HERE, REQUIRE BY GCC/G++ + diff --git a/include/vectormath/vectormath_scalar.h b/include/vectormath/vectormath_scalar.h index e6c804a..d45b1d8 100644 --- a/include/vectormath/vectormath_scalar.h +++ b/include/vectormath/vectormath_scalar.h @@ -2,6 +2,7 @@ #include "scalarmath.h" #include "vectormath_types.h" +#include "vectormath_common.h" // Should not use both scalar & m128 versions of vector_math together #if defined(VECTORMATH_FUNCTIONS_DEFINED) @@ -15,30 +16,6 @@ // Constructors // ------------------------------------------------------------- -/// Create a new vector 2D -__forceinline vec2 vec2_new(float x, float y) -{ - vec2 result; - result.x = x; - result.y = y; - return result; -} - -/// Create a new vector 2D with 2 components have same value -__forceinline vec2 vec2_new1(float s) -{ - vec2 result; - result.x = s; - result.y = s; - return result; -} - -/// Create a new vector 2D from a vector 3D -__forceinline vec2 vec2_from_vec3(vec3 v) -{ - return v.xy; -} - /// Create a new vector 3D __forceinline vec3 vec3_new(float x, float y, float z) { @@ -182,77 +159,6 @@ __forceinline mat4 mat4_load(const float ptr[]) // Operators-like functions // ------------------------------------------------------------- -__forceinline vec2 vec2_neg(vec2 v) -{ - return vec2_new(-v.x, -v.y); -} - -__forceinline vec2 vec2_add(vec2 a, vec2 b) -{ - return vec2_new(a.x + b.x, a.y + b.y); -} - -__forceinline vec2 vec2_sub(vec2 a, vec2 b) -{ - return vec2_new(a.x - b.x, a.y - b.y); -} - -__forceinline vec2 vec2_mul(vec2 a, vec2 b) -{ - return vec2_new(a.x * b.x, a.y * b.y); -} - -__forceinline vec2 vec2_div(vec2 a, vec2 b) -{ - return vec2_new(a.x / b.x, a.y / b.y); -} - -__forceinline vec2 vec2_add1(vec2 a, float b) -{ - return vec2_add(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_sub1(vec2 a, float b) -{ - return vec2_sub(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_mul1(vec2 a, float b) -{ - return vec2_mul(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_div1(vec2 a, float b) -{ - return vec2_div(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_mul_add(vec2 a, vec2 b, vec2 c) -{ - return vec2_add(c, vec2_mul(a, b)); -} - -__forceinline vec2 vec2_mul_sub(vec2 a, vec2 b, vec2 c) -{ - return vec2_sub(c, vec2_mul(a, b)); -} - -__forceinline bool vec2_equal(vec2 a, vec2 b) -{ - return a.x == b.x && a.y == b.y; -} - -__forceinline bool vec2_not_equal(vec2 a, vec2 b) -{ - return a.x != b.x || a.y != b.y; -} - -__forceinline bool vec2_isclose(vec2 a, vec2 b) -{ - return float_isclose(a.x, b.x) - && float_isclose(a.y, b.y); -} - __forceinline vec3 vec3_neg(vec3 v) { return vec3_new(-v.x, -v.y, -v.z); @@ -442,301 +348,6 @@ __forceinline bool mat4_not_equal(mat4 a, mat4 b) // Functions // ------------------------------------------------------------- -/// Computes sign of 'x' -__forceinline ivec2 vec2_sign(vec2 v) -{ - ivec2 result = { float_sign(v.x), float_sign(v.y) }; - return result; -} - -/// Computes absolute value -__forceinline vec2 vec2_abs(vec2 v) -{ - return vec2_new(fabsf(v.x), fabsf(v.y)); -} - -/// Computes cosine -__forceinline vec2 vec2_cos(vec2 v) -{ - return vec2_new(cosf(v.x), cosf(v.y)); -} - -/// Computes sine -__forceinline vec2 vec2_sin(vec2 v) -{ - return vec2_new(sinf(v.x), sinf(v.y)); -} - -/// Computes tangent -__forceinline vec2 vec2_tan(vec2 v) -{ - return vec2_new(tanf(v.x), tanf(v.y)); -} - -/// Computes hyperbolic cosine -__forceinline vec2 vec2_cosh(vec2 v) -{ - return vec2_new(coshf(v.x), coshf(v.y)); -} - -/// Computes hyperbolic sine -__forceinline vec2 vec2_sinh(vec2 v) -{ - return vec2_new(sinhf(v.x), sinhf(v.y)); -} - -/// Computes hyperbolic tangent -__forceinline vec2 vec2_tanh(vec2 v) -{ - return vec2_new(tanhf(v.x), tanhf(v.y)); -} - -/// Computes inverse cosine -__forceinline vec2 vec2_acos(vec2 v) -{ - return vec2_new(acosf(v.x), acosf(v.y)); -} - -/// Computes inverse sine -__forceinline vec2 vec2_asin(vec2 v) -{ - return vec2_new(asinf(v.x), asinf(v.y)); -} - -/// Computes inverse tangent -__forceinline vec2 vec2_atan(vec2 v) -{ - return vec2_new(atanf(v.x), atanf(v.y)); -} - -/// Computes inverse tangent with 2 args -__forceinline vec2 vec2_atan2(vec2 a, vec2 b) -{ - return vec2_new(atan2f(a.x, b.x), atan2f(a.y, b.y)); -} - -/// Computes Euler number raised to the power 'x' -__forceinline vec2 vec2_exp(vec2 v) -{ - return vec2_new(expf(v.x), expf(v.y)); -} - -/// Computes 2 raised to the power 'x' -__forceinline vec2 vec2_exp2(vec2 v) -{ - return vec2_new(exp2f(v.x), exp2f(v.y)); -} - -/// Computes the base Euler number logarithm -__forceinline vec2 vec2_log(vec2 v) -{ - return vec2_new(logf(v.x), logf(v.y)); -} - -/// Computes the base 2 logarithm -__forceinline vec2 vec2_log2(vec2 v) -{ - return vec2_new(log2f(v.x), log2f(v.y)); -} - -/// Computes the base 10 logarithm -__forceinline vec2 vec2_log10(vec2 v) -{ - return vec2_new(log10f(v.x), log10f(v.y)); -} - -/// Computes the value of base raised to the power exponent -__forceinline vec2 vec2_pow(vec2 a, vec2 b) -{ - return vec2_new(powf(a.x, b.x), powf(a.y, b.y)); -} - -/// Get the fractal part of floating point -__forceinline vec2 vec2_frac(vec2 v) -{ - return vec2_new(float_frac(v.x), float_frac(v.y)); -} - -/// Computes the floating-point remainder of the division operation x/y -__forceinline vec2 vec2_fmod(vec2 a, vec2 b) -{ - return vec2_new(fmodf(a.x, b.x), fmodf(a.y, b.y)); -} - -/// Computes the smallest integer value not less than 'x' -__forceinline vec2 vec2_ceil(vec2 v) -{ - return vec2_new(ceilf(v.x), ceilf(v.y)); -} - -/// Computes the largest integer value not greater than 'x' -__forceinline vec2 vec2_floor(vec2 v) -{ - return vec2_new(floorf(v.x), floorf(v.y)); -} - -/// Computes the nearest integer value -__forceinline vec2 vec2_round(vec2 v) -{ - return vec2_new(roundf(v.x), roundf(v.y)); -} - -/// Computes the nearest integer not greater in magnitude than 'x' -__forceinline vec2 vec2_trunc(vec2 v) -{ - return vec2_new(truncf(v.x), truncf(v.y)); -} - -/// Get the smaller value -__forceinline vec2 vec2_min(vec2 a, vec2 b) -{ - return vec2_new(float_min(a.x, b.x), float_min(a.y, b.y)); -} - -/// Get the larger value -__forceinline vec2 vec2_max(vec2 a, vec2 b) -{ - return vec2_new(float_max(a.x, b.x), float_max(a.y, b.y)); -} - -/// Clamps the 'x' value to the [min, max]. -__forceinline vec2 vec2_clamp(vec2 v, vec2 min, vec2 max) -{ - return vec2_new(float_clamp(v.x, min.x, max.x), float_clamp(v.y, min.y, max.y)); -} - -/// Clamps the specified value within the range of 0 to 1 -__forceinline vec2 vec2_saturate(vec2 v) -{ - return vec2_new(float_saturate(v.x), float_saturate(v.y)); -} - -/// Compares two values, returning 0 or 1 based on which value is greater. -__forceinline vec2 vec2_step(vec2 a, vec2 b) -{ - return vec2_new(float_step(a.x, b.x), float_step(a.y, b.y)); -} - -/// Performs a linear interpolation. -__forceinline vec2 vec2_lerp(vec2 a, vec2 b, vec2 t) -{ - return vec2_new(float_lerp(a.x, b.x, t.x), float_lerp(a.y, b.y, t.y)); -} - -/// Performs a linear interpolation. -__forceinline vec2 vec2_lerp1(vec2 a, vec2 b, float t) -{ - return vec2_new(float_lerp(a.x, b.x, t), float_lerp(a.y, b.y, t)); -} - -/// Compute a smooth Hermite interpolation -__forceinline vec2 vec2_smoothstep(vec2 a, vec2 b, vec2 t) -{ - return vec2_new(float_smoothstep(a.x, b.x, t.x), float_smoothstep(a.y, b.y, t.y)); -} - -/// Computes square root of 'x'. -__forceinline vec2 vec2_sqrt(vec2 v) -{ - return vec2_new(sqrtf(v.x), sqrtf(v.y)); -} - -/// Computes inverse square root of 'x'. -__forceinline vec2 vec2_rsqrt(vec2 v) -{ - return vec2_new(float_rsqrt(v.x), float_rsqrt(v.y)); -} - -/// Compute dot product of two vectors -__forceinline float vec2_dot(vec2 a, vec2 b) -{ - return a.x * b.x + a.y * b.y; -} - -/// Compute squared length of vector -__forceinline float vec2_lensqr(vec2 v) -{ - return vec2_dot(v, v); -} - -/// Compute length of vector -__forceinline float vec2_length(vec2 v) -{ - return sqrtf(vec2_lensqr(v)); -} - -/// Compute distance from 'a' to b -__forceinline float vec2_distance(vec2 a, vec2 b) -{ - return vec2_length(vec2_sub(a, b)); -} - -/// Compute squared distance from 'a' to b -__forceinline float vec2_distsqr(vec2 a, vec2 b) -{ - return vec2_lensqr(vec2_sub(a, b)); -} - -/// Compute normalized vector -__forceinline vec2 vec2_normalize(vec2 v) -{ - const float lsqr = vec2_lensqr(v); - if (lsqr > 0.0f) - { - const float f = 1.0f / sqrtf(lsqr); - return vec2_new(v.x * f, v.y * f); - } - else - { - return v; - } -} - -/// Compute reflection vector -__forceinline vec2 vec2_reflect(vec2 v, vec2 n) -{ - return vec2_sub(v, vec2_mul1(n, 2.0f * vec2_dot(v, n))); -} - -/// Compute refraction vector -__forceinline vec2 vec2_refract(vec2 v, vec2 n, float eta) -{ - const float k = 1.0f - eta * eta * (1.0f - vec2_dot(v, n) * vec2_dot(v, n)); - return k < 0.0f - ? vec2_new1(0.0f) - : vec2_sub(vec2_mul1(v, eta), vec2_mul1(v, (eta * vec2_dot(v, n) + sqrtf(k)))); -} - -/// Compute faceforward vector -__forceinline vec2 vec2_faceforward(vec2 n, vec2 i, vec2 nref) -{ - return vec2_dot(i, nref) < 0.0f ? n : vec2_neg(n); -} - -/// Compute angle vector -__forceinline float vec2_angle(vec2 v) -{ - return atan2f(v.y, v.x); -} - -/// Compute angle vector in degrees -__forceinline float vec2_angle_deg(vec2 v) -{ - return float_rad2deg(vec2_angle(v)); -} - -/// Create new vector with angle and length -__forceinline vec2 vec2_from_angle(float angle, float length) -{ - return vec2_new(cosf(angle) * length, sinf(angle) * length); -} - -/// Create new vector with angle in degrees, and length -__forceinline vec2 vec2_from_angle_deg(float angle, float length) -{ - return vec2_from_angle(float_rad2deg(angle), length); -} - /// Computes sign of 'x' __forceinline ivec3 vec3_sign(vec3 v) { diff --git a/include/vectormath/vectormath_simd.h b/include/vectormath/vectormath_simd.h index 8ceaa13..fed9d54 100644 --- a/include/vectormath/vectormath_simd.h +++ b/include/vectormath/vectormath_simd.h @@ -2,6 +2,8 @@ #include "scalarmath.h" #include "vectormath_types.h" +#include "vectormath_common.h" +#include "vectormath_simd_utils.h" // Make sure m128 is enable #if !VECTORMATH_SIMD_ENABLE @@ -16,288 +18,10 @@ // Helper for extensions #define VECTORMATH_FUNCTIONS_DEFINED -// ------------------------------------------------------------- -// Helper constants -// ------------------------------------------------------------- - -// Small epsilon value -constexpr float SIMD_SLERP_TOL = 0.999f; - -// Common constants used to evaluate simdSinf/cosf4/tanf4 -constexpr float SIMD_SINCOS_CC0 = -0.0013602249f; -constexpr float SIMD_SINCOS_CC1 = 0.0416566950f; -constexpr float SIMD_SINCOS_CC2 = -0.4999990225f; -constexpr float SIMD_SINCOS_SC0 = -0.0001950727f; -constexpr float SIMD_SINCOS_SC1 = 0.0083320758f; -constexpr float SIMD_SINCOS_SC2 = -0.1666665247f; -constexpr float SIMD_SINCOS_KC1 = 1.57079625129f; -constexpr float SIMD_SINCOS_KC2 = 7.54978995489e-8f; - -// Shorthand functions to get the unit vectors as __m128 -__forceinline __m128 m128_unit_1000() { return _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f); } -__forceinline __m128 m128_unit_0100() { return _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f); } -__forceinline __m128 m128_unit_0010() { return _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f); } -__forceinline __m128 m128_unit_0001() { return _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f); } - -// ======================================================== -// Internal helper types and functions -// ======================================================== - -// These have to be macros because _MM_SHUFFLE() requires compile-time constants. -#define m128_ror(vec, i) (((i) % 4) ? (_mm_shuffle_ps(vec, vec, _MM_SHUFFLE((uint32_t)(i + 3) % 4, (uint32_t)(i + 2) % 4, (uint32_t)(i + 1) % 4, (uint32_t)(i + 0) % 4))) : (vec)) -#define m128_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e)) -#define m128_sld(vec, vec2, x) m128_ror(vec, ((x) / 4)) - -__forceinline __m128 m128_mul_add(__m128 a, __m128 b, __m128 c) -{ - return _mm_add_ps(c, _mm_mul_ps(a, b)); -} - -__forceinline __m128 m128_mul_sub(__m128 a, __m128 b, __m128 c) -{ - return _mm_sub_ps(c, _mm_mul_ps(a, b)); -} - -__forceinline __m128 m128_merge_hi(__m128 a, __m128 b) -{ - return _mm_unpacklo_ps(a, b); -} - -__forceinline __m128 m128_merge_lo(__m128 a, __m128 b) -{ - return _mm_unpackhi_ps(a, b); -} - -__forceinline __m128 m128_select(__m128 a, __m128 b, __m128 mask) -{ - return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); -} - -__forceinline __m128 m128_negatef(__m128 x) -{ - return _mm_sub_ps(_mm_setzero_ps(), x); -} - -__forceinline __m128 m128_fabsf(__m128 x) -{ - return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); - //return _mm_andnot_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); -} - -__forceinline __m128 m128_acosf(__m128 x) -{ - const __m128 xabs = m128_fabsf(x); - const __m128 select = _mm_cmplt_ps(x, _mm_setzero_ps()); - const __m128 t1 = _mm_sqrt_ps(_mm_sub_ps(_mm_set1_ps(1.0f), xabs)); - - /* Instruction counts can be reduced if the polynomial was - * computed entirely from nested (dependent) fma's. However, - * to reduce the number of pipeline stalls, the polygon is evaluated - * in two halves (hi and lo). - */ - const __m128 xabs2 = _mm_mul_ps(xabs, xabs); - const __m128 xabs4 = _mm_mul_ps(xabs2, xabs2); - - const __m128 hi = m128_mul_add(m128_mul_add(m128_mul_add(_mm_set1_ps(-0.0012624911f), - xabs, _mm_set1_ps( 0.0066700901f)), - xabs, _mm_set1_ps(-0.0170881256f)), - xabs, _mm_set1_ps( 0.0308918810f)); - - const __m128 lo = m128_mul_add(m128_mul_add(m128_mul_add(_mm_set1_ps(-0.0501743046f), - xabs, _mm_set1_ps( 0.0889789874f)), - xabs, _mm_set1_ps(-0.2145988016f)), - xabs, _mm_set1_ps( 1.5707963050f)); - - const __m128 result = m128_mul_add(hi, xabs4, lo); - - // Adjust the result if x is negative. - return m128_select( - _mm_mul_ps(t1, result), // Positive - m128_mul_sub(t1, result, _mm_set1_ps(3.1415926535898f)), // Negative - select - ); -} - -__forceinline __m128 m128_cosf(__m128 x) -{ - // Range reduction - // Find the quadrant the angle falls in - // Algo: - // xl = angle * TwoOverPi; - // q = (int32_t)(ceil(abs(xl)) * sign(xl)) - const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); - const __m128i offset_cos = _mm_add_epi32(_mm_set1_epi32(1), offset_sin); - - // Remainder in range [-pi/4..pi/4] - const __m128 qf = _mm_cvtepi32_ps(q); - const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); - - // Compute x^2 and x^3 - const __m128 xl2 = _mm_mul_ps(xl, xl); - const __m128 xl3 = _mm_mul_ps(xl2, xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - const __m128 cx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), - xl2, _mm_set1_ps(1.0f)); - - const __m128 sx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), - xl3, xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - __m128 cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - - __m128 cos = m128_select(cx, sx, cos_mask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - - // Write the result - return m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), cos), cos, cos_mask); -} - -__forceinline __m128 m128_sinf(__m128 x) -{ - // Range reduction - // Find the quadrant the angle falls in - // Algo: - // xl = angle * TwoOverPi; - // q = (int32_t)(ceil(abs(xl)) * sign(xl)) - const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); - - // Remainder in range [-pi/4..pi/4] - const __m128 qf = _mm_cvtepi32_ps(q); - const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); - - // Compute x^2 and x^3 - const __m128 xl2 = _mm_mul_ps(xl, xl); - const __m128 xl3 = _mm_mul_ps(xl2, xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - const __m128 cx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), - xl2, _mm_set1_ps(1.0f)); - - const __m128 sx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), - xl3, xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - __m128 sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - - __m128 sin = m128_select(cx, sx, sin_mask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - - // Write the result - return m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), sin), sin, sin_mask); -} - -__forceinline void m128_sinf_cosf(__m128 x, __m128* out_sin, __m128* out_cos) -{ - // Range reduction - // Find the quadrant the angle falls in - // Algo: - // xl = angle * TwoOverPi; - // q = (int32_t)(ceil(abs(xl)) * sign(xl)) - const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); - - // Compute the offset based on the quadrant that the angle falls in. - // Add 1 to the offset for the cosine. - const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); - const __m128i offset_cos = _mm_add_epi32(_mm_set1_epi32(1), offset_sin); - - // Remainder in range [-pi/4..pi/4] - const __m128 qf = _mm_cvtepi32_ps(q); - const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); - - // Compute x^2 and x^3 - const __m128 xl2 = _mm_mul_ps(xl, xl); - const __m128 xl3 = _mm_mul_ps(xl2, xl); - - // Compute both the sin and cos of the angles - // using a polynomial expression: - // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) - // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) - const __m128 cx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), - xl2, _mm_set1_ps(1.0f)); - - const __m128 sx = - m128_mul_add( - m128_mul_add( - m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), - xl3, xl); - - // Use the cosine when the offset is odd and the sin - // when the offset is even - __m128 sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - __m128 cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x1)), _mm_setzero_si128())); - - __m128 sin = m128_select(cx, sx, sin_mask); - __m128 cos = m128_select(cx, sx, cos_mask); - - // Flip the sign of the result when (offset mod 4) = 1 or 2 - sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x2)), _mm_setzero_si128())); - - // Write the result - *out_sin = m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), sin), sin, sin_mask); - *out_cos = m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), cos), cos, cos_mask); -} - // ------------------------------------------------------------- // Constructors // ------------------------------------------------------------- -/// Create a new vector 2D -__forceinline vec2 vec2_new(float x, float y) -{ - vec2 result = { x, y }; - return result; -} - -/// Create a new vector 2D, with components have same value -__forceinline vec2 vec2_new1(float s) -{ - vec2 result = { s, s }; - return result; -} - -/// Create a new vector 2D from a vector 3D -__forceinline vec2 vec2_from_vec3(vec3 v) -{ - return v.xy; -} - /// Create a new vector 3D __forceinline vec3 vec3_new(float x, float y, float z) { @@ -440,76 +164,6 @@ __forceinline mat4 mat4_load(const float ptr[]) // Operators-like functions // ------------------------------------------------------------- -__forceinline vec2 vec2_neg(vec2 v) -{ - return vec2_new(-v.x, -v.y); -} - -__forceinline vec2 vec2_add(vec2 a, vec2 b) -{ - return vec2_new(a.x + b.x, a.y + b.y); -} - -__forceinline vec2 vec2_sub(vec2 a, vec2 b) -{ - return vec2_new(a.x - b.x, a.y - b.y); -} - -__forceinline vec2 vec2_mul(vec2 a, vec2 b) -{ - return vec2_new(a.x * b.x, a.y * b.y); -} - -__forceinline vec2 vec2_div(vec2 a, vec2 b) -{ - return vec2_new(a.x / b.x, a.y / b.y); -} - -__forceinline vec2 vec2_add1(vec2 a, float b) -{ - return vec2_add(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_sub1(vec2 a, float b) -{ - return vec2_sub(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_mul1(vec2 a, float b) -{ - return vec2_mul(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_div1(vec2 a, float b) -{ - return vec2_div(a, vec2_new1(b)); -} - -__forceinline vec2 vec2_mul_add(vec2 a, vec2 b, vec2 c) -{ - return vec2_add(c, vec2_mul(a, b)); -} - -__forceinline vec2 vec2_mul_sub(vec2 a, vec2 b, vec2 c) -{ - return vec2_sub(c, vec2_mul(a, b)); -} - -__forceinline bool vec2_equal(vec2 a, vec2 b) -{ - return a.x == b.x && a.y == b.y; -} - -__forceinline bool vec2_not_equal(vec2 a, vec2 b) -{ - return a.x != b.x || a.y != b.y; -} - -__forceinline bool vec2_isclose(vec2 a, vec2 b) -{ - return float_isclose(a.x, b.x) && float_isclose(a.y, b.y); -} - __forceinline vec3 vec3_neg(vec3 v) { return vec3_from_m128(_mm_sub_ps(_mm_setzero_ps(), v.m128)); @@ -696,303 +350,6 @@ __forceinline bool mat4_not_equal(mat4 a, mat4 b) // Functions // ------------------------------------------------------------- -/// Computes sign of 'x' -__forceinline ivec2 vec2_sign(vec2 v) -{ - ivec2 result = { float_sign(v.x), float_sign(v.y) }; - return result; -} - -/// Computes absolute value -__forceinline vec2 vec2_abs(vec2 v) -{ - return vec2_new(fabsf(v.x), fabsf(v.y)); -} - -/// Computes cosine -__forceinline vec2 vec2_cos(vec2 v) -{ - return vec2_new(cosf(v.x), cosf(v.y)); -} - -/// Computes sine -__forceinline vec2 vec2_sin(vec2 v) -{ - return vec2_new(sinf(v.x), sinf(v.y)); -} - -/// Computes tangent -__forceinline vec2 vec2_tan(vec2 v) -{ - return vec2_new(tanf(v.x), tanf(v.y)); -} - -/// Computes hyperbolic cosine -__forceinline vec2 vec2_cosh(vec2 v) -{ - return vec2_new(coshf(v.x), coshf(v.y)); -} - -/// Computes hyperbolic sine -__forceinline vec2 vec2_sinh(vec2 v) -{ - return vec2_new(sinhf(v.x), sinhf(v.y)); -} - -/// Computes hyperbolic tangent -__forceinline vec2 vec2_tanh(vec2 v) -{ - return vec2_new(tanhf(v.x), tanhf(v.y)); -} - -/// Computes inverse cosine -__forceinline vec2 vec2_acos(vec2 v) -{ - return vec2_new(acosf(v.x), acosf(v.y)); -} - -/// Computes inverse sine -__forceinline vec2 vec2_asin(vec2 v) -{ - return vec2_new(asinf(v.x), asinf(v.y)); -} - -/// Computes inverse tangent -__forceinline vec2 vec2_atan(vec2 v) -{ - return vec2_new(atanf(v.x), atanf(v.y)); -} - -/// Computes inverse tangent with 2 args -__forceinline vec2 vec2_atan2(vec2 a, vec2 b) -{ - return vec2_new(atan2f(a.x, b.x), atan2f(a.y, b.y)); -} - -/// Computes Euler number raised to the power 'x' -__forceinline vec2 vec2_exp(vec2 v) -{ - return vec2_new(expf(v.x), expf(v.y)); -} - -/// Computes 2 raised to the power 'x' -__forceinline vec2 vec2_exp2(vec2 v) -{ - return vec2_new(exp2f(v.x), exp2f(v.y)); -} - -/// Computes the base Euler number logarithm -__forceinline vec2 vec2_log(vec2 v) -{ - return vec2_new(logf(v.x), logf(v.y)); -} - -/// Computes the base 2 logarithm -__forceinline vec2 vec2_log2(vec2 v) -{ - return vec2_new(log2f(v.x), log2f(v.y)); -} - -/// Computes the base 10 logarithm -__forceinline vec2 vec2_log10(vec2 v) -{ - return vec2_new(log10f(v.x), log10f(v.y)); -} - -/// Computes the value of base raised to the power exponent -__forceinline vec2 vec2_pow(vec2 a, vec2 b) -{ - return vec2_new(powf(a.x, b.x), powf(a.y, b.y)); -} - -/// Get the fractal part of floating point -__forceinline vec2 vec2_frac(vec2 v) -{ - return vec2_new(float_frac(v.x), float_frac(v.y)); -} - -/// Computes the floating-point remainder of the division operation x/y -__forceinline vec2 vec2_fmod(vec2 a, vec2 b) -{ - return vec2_new(fmodf(a.x, b.x), fmodf(a.y, b.y)); -} - -/// Computes the smallest integer value not less than 'x' -__forceinline vec2 vec2_ceil(vec2 v) -{ - return vec2_new(ceilf(v.x), ceilf(v.y)); -} - -/// Computes the largest integer value not greater than 'x' -__forceinline vec2 vec2_floor(vec2 v) -{ - return vec2_new(floorf(v.x), floorf(v.y)); -} - -/// Computes the nearest integer value -__forceinline vec2 vec2_round(vec2 v) -{ - return vec2_new(roundf(v.x), roundf(v.y)); -} - -/// Computes the nearest integer not greater in magnitude than 'x' -__forceinline vec2 vec2_trunc(vec2 v) -{ - return vec2_new(truncf(v.x), truncf(v.y)); -} - -/// Get the smaller value -__forceinline vec2 vec2_min(vec2 a, vec2 b) -{ - return vec2_new(float_min(a.x, b.x), float_min(a.y, b.y)); -} - -/// Get the larger value -__forceinline vec2 vec2_max(vec2 a, vec2 b) -{ - return vec2_new(float_max(a.x, b.x), float_max(a.y, b.y)); -} - -/// Clamps the 'x' value to the [min, max]. -__forceinline vec2 vec2_clamp(vec2 v, vec2 min, vec2 max) -{ - return vec2_new(float_clamp(v.x, min.x, max.x), float_clamp(v.y, min.y, max.y)); -} - -/// Clamps the specified value within the range of 0 to 1 -__forceinline vec2 vec2_saturate(vec2 v) -{ - return vec2_new(float_saturate(v.x), float_saturate(v.y)); -} - -/// Compares two values, returning 0 or 1 based on which value is greater. -__forceinline vec2 vec2_step(vec2 a, vec2 b) -{ - return vec2_new(float_step(a.x, b.x), float_step(a.y, b.y)); -} - -/// Performs a linear interpolation. -__forceinline vec2 vec2_lerp(vec2 a, vec2 b, vec2 t) -{ - return vec2_new(float_lerp(a.x, b.x, t.x), float_lerp(a.y, b.y, t.y)); -} - -/// Performs a linear interpolation. -__forceinline vec2 vec2_lerp1(vec2 a, vec2 b, float t) -{ - return vec2_new(float_lerp(a.x, b.x, t), float_lerp(a.y, b.y, t)); -} - -/// Compute a smooth Hermite interpolation -__forceinline vec2 vec2_smoothstep(vec2 a, vec2 b, vec2 t) -{ - return vec2_new(float_smoothstep(a.x, b.x, t.x), float_smoothstep(a.y, b.y, t.y)); -} - -/// Computes square root of 'x'. -__forceinline vec2 vec2_sqrt(vec2 v) -{ - return vec2_new(sqrtf(v.x), sqrtf(v.y)); -} - -/// Computes inverse square root of 'x'. -__forceinline vec2 vec2_rsqrt(vec2 v) -{ - return vec2_new(float_rsqrt(v.x), float_rsqrt(v.y)); -} - -/// Compute dot product of two vectors -__forceinline float vec2_dot(vec2 a, vec2 b) -{ - return a.x * b.x + a.y * b.y; -} - -/// Compute squared length of vector -__forceinline float vec2_lensqr(vec2 v) -{ - return vec2_dot(v, v); -} - -/// Compute length of vector -__forceinline float vec2_length(vec2 v) -{ - return float32_sqrt(vec2_lensqr(v)); -} - -/// Compute distance from 'a' to b -__forceinline float vec2_distance(vec2 a, vec2 b) -{ - return vec2_length(vec2_sub(a, b)); -} - -/// Compute squared distance from 'a' to b -__forceinline float vec2_distsqr(vec2 a, vec2 b) -{ - return vec2_lensqr(vec2_sub(a, b)); -} - -/// Compute normalized vector -__forceinline vec2 vec2_normalize(vec2 v) -{ -#ifndef VECTORMATH_USE_EXACT_PRECISION - return vec2_mul1(v, float32_fast_rsqrt(vec2_lensqr(v))); -#else - const float lsqr = vec2_lensqr(v); - if (lsqr > 0.0f) - { - const float f = 1.0f / sqrtf(lsqr); - return vec2_new(v.x * f, v.y * f); - } - else - { - return v; - } -#endif -} - -/// Compute reflection vector -__forceinline vec2 vec2_reflect(vec2 v, vec2 n) -{ - return vec2_sub(v, vec2_mul1(n, 2.0f * vec2_dot(v, n))); -} - -/// Compute refraction vector -__forceinline vec2 vec2_refract(vec2 v, vec2 n, float eta) -{ - const float k = 1.0f - eta * eta * (1.0f - vec2_dot(v, n) * vec2_dot(v, n)); - return k < 0.0f - ? vec2_new1(0.0f) - : vec2_sub(vec2_mul1(v, eta), vec2_mul1(v, (eta * vec2_dot(v, n) + sqrtf(k)))); -} - -/// Compute faceforward vector -__forceinline vec2 vec2_faceforward(vec2 n, vec2 i, vec2 nref) -{ - return vec2_dot(i, nref) < 0.0f ? n : vec2_neg(n); -} - -/// Compute angle vector -__forceinline float vec2_angle(vec2 v) -{ - return atan2f(v.y, v.x); -} - -/// Compute angle vector in degrees -__forceinline float vec2_angle_deg(vec2 v) -{ - return float_rad2deg(vec2_angle(v)); -} - -/// Create new vector with angle and length -__forceinline vec2 vec2_from_angle(float angle, float length) { - return vec2_new(cosf(angle) * length, sinf(angle) * length); -} - -/// Create new vector with angle in degrees, and length -__forceinline vec2 vec2_from_angle_deg(float angle, float length) { - return vec2_from_angle(float_rad2deg(angle), length); -} - /// Computes sign of 'x' __forceinline ivec3 vec3_sign(vec3 v) { diff --git a/include/vectormath/vectormath_simd_utils.h b/include/vectormath/vectormath_simd_utils.h new file mode 100644 index 0000000..2cc85c9 --- /dev/null +++ b/include/vectormath/vectormath_simd_utils.h @@ -0,0 +1,286 @@ +#pragma once + +#include "vectormath_types.h" + +// Make sure m128 is enable +#if !VECTORMATH_SIMD_ENABLE && !VECTORMATH_USE_CLANG_EXT +#error The vectormath support for simd is not enable, please use vector_math_scalar.h instead +#endif + +// ------------------------------------------------------------- +// Helper constants +// ------------------------------------------------------------- + +// Small epsilon value +constexpr float SIMD_SLERP_TOL = 0.999f; + +// Common constants used to evaluate simdSinf/cosf4/tanf4 +constexpr float SIMD_SINCOS_CC0 = -0.0013602249f; +constexpr float SIMD_SINCOS_CC1 = 0.0416566950f; +constexpr float SIMD_SINCOS_CC2 = -0.4999990225f; +constexpr float SIMD_SINCOS_SC0 = -0.0001950727f; +constexpr float SIMD_SINCOS_SC1 = 0.0083320758f; +constexpr float SIMD_SINCOS_SC2 = -0.1666665247f; +constexpr float SIMD_SINCOS_KC1 = 1.57079625129f; +constexpr float SIMD_SINCOS_KC2 = 7.54978995489e-8f; + + +// Shorthand functions to get the unit vectors as __m128 +__forceinline __m128 m128_unit_1000(void) { return _mm_setr_ps(1.0f, 0.0f, 0.0f, 0.0f); } +__forceinline __m128 m128_unit_0100(void) { return _mm_setr_ps(0.0f, 1.0f, 0.0f, 0.0f); } +__forceinline __m128 m128_unit_0010(void) { return _mm_setr_ps(0.0f, 0.0f, 1.0f, 0.0f); } +__forceinline __m128 m128_unit_0001(void) { return _mm_setr_ps(0.0f, 0.0f, 0.0f, 1.0f); } + + +// ======================================================== +// Internal helper types and functions +// ======================================================== + + +// These have to be macros because _MM_SHUFFLE() requires compile-time constants. +#define m128_ror(vec, i) (((i) % 4) ? (_mm_shuffle_ps(vec, vec, _MM_SHUFFLE((uint32_t)(i + 3) % 4, (uint32_t)(i + 2) % 4, (uint32_t)(i + 1) % 4, (uint32_t)(i + 0) % 4))) : (vec)) +#define m128_splat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e)) +#define m128_sld(vec, vec2, x) m128_ror(vec, ((x) / 4)) + + +__forceinline __m128 m128_mul_add(__m128 a, __m128 b, __m128 c) +{ + return _mm_add_ps(c, _mm_mul_ps(a, b)); +} + + +__forceinline __m128 m128_mul_sub(__m128 a, __m128 b, __m128 c) +{ + return _mm_sub_ps(c, _mm_mul_ps(a, b)); +} + + +__forceinline __m128 m128_merge_hi(__m128 a, __m128 b) +{ + return _mm_unpacklo_ps(a, b); +} + + +__forceinline __m128 m128_merge_lo(__m128 a, __m128 b) +{ + return _mm_unpackhi_ps(a, b); +} + + +__forceinline __m128 m128_select(__m128 a, __m128 b, __m128 mask) +{ + return _mm_or_ps(_mm_and_ps(mask, b), _mm_andnot_ps(mask, a)); +} + + +__forceinline __m128 m128_negatef(__m128 x) +{ + return _mm_sub_ps(_mm_setzero_ps(), x); +} + + +__forceinline __m128 m128_fabsf(__m128 x) +{ + return _mm_and_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x7FFFFFFF))); + //return _mm_andnot_ps(x, _mm_castsi128_ps(_mm_set1_epi32(0x80000000))); +} + + +__forceinline __m128 m128_acosf(__m128 x) +{ + const __m128 xabs = m128_fabsf(x); + const __m128 select = _mm_cmplt_ps(x, _mm_setzero_ps()); + const __m128 t1 = _mm_sqrt_ps(_mm_sub_ps(_mm_set1_ps(1.0f), xabs)); + + /* Instruction counts can be reduced if the polynomial was + * computed entirely from nested (dependent) fma's. However, + * to reduce the number of pipeline stalls, the polygon is evaluated + * in two halves (hi and lo). + */ + const __m128 xabs2 = _mm_mul_ps(xabs, xabs); + const __m128 xabs4 = _mm_mul_ps(xabs2, xabs2); + + const __m128 hi = m128_mul_add(m128_mul_add(m128_mul_add(_mm_set1_ps(-0.0012624911f), + xabs, _mm_set1_ps( 0.0066700901f)), + xabs, _mm_set1_ps(-0.0170881256f)), + xabs, _mm_set1_ps( 0.0308918810f)); + + const __m128 lo = m128_mul_add(m128_mul_add(m128_mul_add(_mm_set1_ps(-0.0501743046f), + xabs, _mm_set1_ps( 0.0889789874f)), + xabs, _mm_set1_ps(-0.2145988016f)), + xabs, _mm_set1_ps( 1.5707963050f)); + + const __m128 result = m128_mul_add(hi, xabs4, lo); + + // Adjust the result if x is negative. + return m128_select( + _mm_mul_ps(t1, result), // Positive + m128_mul_sub(t1, result, _mm_set1_ps(3.1415926535898f)), // Negative + select + ); +} + + +/// Compute cosf for SIMD 128 +__forceinline __m128 m128_cosf(__m128 x) +{ + // Range reduction + // Find the quadrant the angle falls in + // Algo: + // xl = angle * TwoOverPi; + // q = (int32_t)(ceil(abs(xl)) * sign(xl)) + const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); + const __m128i offset_cos = _mm_add_epi32(_mm_set1_epi32(1), offset_sin); + + // Remainder in range [-pi/4..pi/4] + const __m128 qf = _mm_cvtepi32_ps(q); + const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); + + // Compute x^2 and x^3 + const __m128 xl2 = _mm_mul_ps(xl, xl); + const __m128 xl3 = _mm_mul_ps(xl2, xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + const __m128 cx = + m128_mul_add( + m128_mul_add( + m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), + xl2, _mm_set1_ps(1.0f)); + + const __m128 sx = + m128_mul_add( + m128_mul_add( + m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), + xl3, xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + __m128 cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x1)), _mm_setzero_si128())); + + __m128 cos = m128_select(cx, sx, cos_mask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x2)), _mm_setzero_si128())); + + // Write the result + return m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), cos), cos, cos_mask); +} + + +/// Compute sinf for SIMD 128 +__forceinline __m128 m128_sinf(__m128 x) +{ + // Range reduction + // Find the quadrant the angle falls in + // Algo: + // xl = angle * TwoOverPi; + // q = (int32_t)(ceil(abs(xl)) * sign(xl)) + const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); + + // Remainder in range [-pi/4..pi/4] + const __m128 qf = _mm_cvtepi32_ps(q); + const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); + + // Compute x^2 and x^3 + const __m128 xl2 = _mm_mul_ps(xl, xl); + const __m128 xl3 = _mm_mul_ps(xl2, xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + const __m128 cx = + m128_mul_add( + m128_mul_add( + m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), + xl2, _mm_set1_ps(1.0f)); + + const __m128 sx = + m128_mul_add( + m128_mul_add( + m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), + xl3, xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + __m128 sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x1)), _mm_setzero_si128())); + + __m128 sin = m128_select(cx, sx, sin_mask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x2)), _mm_setzero_si128())); + + // Write the result + return m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), sin), sin, sin_mask); +} + + +/// Compute cosf and sinf for SIMD 128 in one function call +__forceinline void m128_sinf_cosf(__m128 x, __m128* out_sin, __m128* out_cos) +{ + // Range reduction + // Find the quadrant the angle falls in + // Algo: + // xl = angle * TwoOverPi; + // q = (int32_t)(ceil(abs(xl)) * sign(xl)) + const __m128i q = _mm_cvtps_epi32(_mm_mul_ps(x, _mm_set1_ps(0.63661977236f))); + + // Compute the offset based on the quadrant that the angle falls in. + // Add 1 to the offset for the cosine. + const __m128i offset_sin = _mm_and_si128(q, _mm_set1_epi32(0x3)); + const __m128i offset_cos = _mm_add_epi32(_mm_set1_epi32(1), offset_sin); + + // Remainder in range [-pi/4..pi/4] + const __m128 qf = _mm_cvtepi32_ps(q); + const __m128 xl = m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC2), m128_mul_sub(qf, _mm_set1_ps(SIMD_SINCOS_KC1), x)); + + // Compute x^2 and x^3 + const __m128 xl2 = _mm_mul_ps(xl, xl); + const __m128 xl3 = _mm_mul_ps(xl2, xl); + + // Compute both the sin and cos of the angles + // using a polynomial expression: + // cx = 1.0f + xl2 * ((C0 * xl2 + C1) * xl2 + C2) + // sx = xl + xl3 * ((S0 * xl2 + S1) * xl2 + S2) + const __m128 cx = + m128_mul_add( + m128_mul_add( + m128_mul_add(_mm_set1_ps(SIMD_SINCOS_CC0), xl2, _mm_set1_ps(SIMD_SINCOS_CC1)), xl2, _mm_set1_ps(SIMD_SINCOS_CC2)), + xl2, _mm_set1_ps(1.0f)); + + const __m128 sx = + m128_mul_add( + m128_mul_add( + m128_mul_add(_mm_set1_ps(SIMD_SINCOS_SC0), xl2, _mm_set1_ps(SIMD_SINCOS_SC1)), xl2, _mm_set1_ps(SIMD_SINCOS_SC2)), + xl3, xl); + + // Use the cosine when the offset is odd and the sin + // when the offset is even + __m128 sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x1)), _mm_setzero_si128())); + __m128 cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x1)), _mm_setzero_si128())); + + __m128 sin = m128_select(cx, sx, sin_mask); + __m128 cos = m128_select(cx, sx, cos_mask); + + // Flip the sign of the result when (offset mod 4) = 1 or 2 + sin_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_sin, _mm_set1_epi32(0x2)), _mm_setzero_si128())); + cos_mask = _mm_castsi128_ps(_mm_cmpeq_epi32(_mm_and_si128(offset_cos, _mm_set1_epi32(0x2)), _mm_setzero_si128())); + + // Write the result + *out_sin = m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), sin), sin, sin_mask); + *out_cos = m128_select(_mm_xor_ps(_mm_castsi128_ps(_mm_set1_epi32(0x80000000)), cos), cos, cos_mask); +} + +//! EOF + diff --git a/include/vectormath/vectormath_types.h b/include/vectormath/vectormath_types.h index 7998482..5563a5b 100644 --- a/include/vectormath/vectormath_types.h +++ b/include/vectormath/vectormath_types.h @@ -399,7 +399,7 @@ typedef union VECTORMATH_VECTORTYPE(mat4, 16) #endif static_assert(sizeof(vec2) == 8, "sizeof(vec2) must be 8 bytes"); -static_assert(sizeof(vec3) == 16, "sizeof(vec3) must be 12 bytes"); +static_assert(sizeof(vec3) == 16, "sizeof(vec3) must be 16 bytes"); static_assert(sizeof(vec4) == 16, "sizeof(vec4) must be 16 bytes"); static_assert(sizeof(ivec2) == 8, "sizeof(ivec2) must be 8 bytes"); diff --git a/premake5.lua b/premake5.lua index a089c90..7fa5c1b 100644 --- a/premake5.lua +++ b/premake5.lua @@ -101,11 +101,11 @@ end -- examples newoption { - trigger = "examples", + trigger = "with-examples", description = "Generate examples projects" } -if _OPTIONS["examples"] then +if _OPTIONS["with-examples"] then local function vectormathexample(name) vectormathproject(name) do diff --git a/scripts/vectormath.premake5.lua b/scripts/vectormath.premake5.lua index 520a613..c8803da 100644 --- a/scripts/vectormath.premake5.lua +++ b/scripts/vectormath.premake5.lua @@ -44,12 +44,16 @@ return { path.join(subincludedir, "stdmath_extensions.h"), path.join(subincludedir, "vectormath_types.h"), + path.join(subincludedir, "vectormath_common.h"), path.join(subincludedir, "vectormath_scalar.h"), + path.join(subincludedir, "vectormath_clang.h"), path.join(subincludedir, "vectormath_simd.h"), + path.join(subincludedir, "vectormath_simd_utils.h"), path.join(subincludedir, "vectormath_operators.h"), path.join(subincludedir, "vectormath_swizzles.h"), + path.join(subincludedir, "vectormath_generics.h"), path.join(includedir, "vectormath.h"), } diff --git a/unit_tests/projects/vectormath_unit_tests_android/app/build.gradle b/unit_tests/projects/vectormath_unit_tests_android/app/build.gradle index 545bcb3..ce8065c 100644 --- a/unit_tests/projects/vectormath_unit_tests_android/app/build.gradle +++ b/unit_tests/projects/vectormath_unit_tests_android/app/build.gradle @@ -22,7 +22,7 @@ android { } ndk { - abiFilters 'armeabi-v7a', 'arm64-v8a' + abiFilters 'armeabi-v7a', 'arm64-v8a', 'x86', 'x86_64' } } }