Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

x86: better implementations for MSVC and others without SIMDE_STATEMENT_EXPR_ #1221

Merged
merged 1 commit into from
Sep 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 19 additions & 5 deletions simde/x86/avx.h
Original file line number Diff line number Diff line change
Expand Up @@ -2083,7 +2083,11 @@ simde_mm256_round_ps (simde__m256 a, const int rounding) {
simde__m256_private
r_,
a_ = simde__m256_to_private(a);

#if SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(SIMDE_STATEMENT_EXPR_)
for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
SIMDE_CONSTIFY_16_(simde_mm_round_ps, r_.m128[i], (HEDLEY_UNREACHABLE(), simde_mm_undefined_ps()), rounding, a_.m128[i]);
}
#else
switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
#if defined(simde_math_nearbyintf)
case SIMDE_MM_FROUND_CUR_DIRECTION:
Expand Down Expand Up @@ -2128,7 +2132,7 @@ simde_mm256_round_ps (simde__m256 a, const int rounding) {
default:
HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_ps());
}

#endif
return simde__m256_from_private(r_);
}
#if defined(SIMDE_X86_AVX_NATIVE)
Expand Down Expand Up @@ -2157,6 +2161,11 @@ simde_mm256_round_pd (simde__m256d a, const int rounding) {
simde__m256d_private
r_,
a_ = simde__m256d_to_private(a);
#if SIMDE_NATURAL_VECTOR_SIZE_LE(128) && !defined(SIMDE_STATEMENT_EXPR_)
for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
SIMDE_CONSTIFY_16_(simde_mm_round_pd, r_.m128d[i], (HEDLEY_UNREACHABLE(), simde_mm_undefined_pd()), rounding, a_.m128d[i]);
}
#else

switch (rounding & ~SIMDE_MM_FROUND_NO_EXC) {
#if defined(simde_math_nearbyint)
Expand Down Expand Up @@ -2202,7 +2211,7 @@ simde_mm256_round_pd (simde__m256d a, const int rounding) {
default:
HEDLEY_UNREACHABLE_RETURN(simde_mm256_undefined_pd());
}

#endif
return simde__m256d_from_private(r_);
}
#if defined(SIMDE_X86_AVX_NATIVE)
Expand Down Expand Up @@ -2894,6 +2903,11 @@ simde_mm256_cmp_ps
a_ = simde__m256_to_private(a),
b_ = simde__m256_to_private(b);

#if defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128)
for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
SIMDE_CONSTIFY_32_(simde_mm_cmp_ps, r_.m128[i], (HEDLEY_UNREACHABLE(), simde_mm_undefined_ps()), imm8, a_.m128[i], b_.m128[i]);
}
#else
switch (imm8) {
case SIMDE_CMP_EQ_OQ:
case SIMDE_CMP_EQ_OS:
Expand Down Expand Up @@ -3076,7 +3090,7 @@ simde_mm256_cmp_ps
default:
HEDLEY_UNREACHABLE();
}

#endif
return simde__m256_from_private(r_);
}
#if defined(__clang__) && defined(__AVX512DQ__)
Expand All @@ -3098,7 +3112,7 @@ simde_mm256_cmp_ps
simde_mm256_cmp_ps_r; \
}))
#elif defined(SIMDE_X86_AVX_NATIVE)
#define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps(a, b, imm8)
#define simde_mm256_cmp_ps(a, b, imm8) _mm256_cmp_ps((a), (b), (imm8))
#elif defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128)
#define simde_mm256_cmp_ps(a, b, imm8) SIMDE_STATEMENT_EXPR_(({ \
simde__m256_private \
Expand Down
16 changes: 12 additions & 4 deletions simde/x86/avx512/cmp.h
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,11 @@ simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8)
r_,
a_ = simde__m512_to_private(a),
b_ = simde__m512_to_private(b);

#if !defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128)
for (size_t i = 0 ; i < (sizeof(r_.m128) / sizeof(r_.m128[0])) ; i++) {
SIMDE_CONSTIFY_32_(simde_mm_cmp_ps, r_.m128[i], simde_mm_undefined_ps(), imm8, a_.m128[i], b_.m128[i]);
}
#else
switch (imm8) {
case SIMDE_CMP_EQ_OQ:
case SIMDE_CMP_EQ_OS:
Expand Down Expand Up @@ -431,7 +435,7 @@ simde_mm512_cmp_ps_mask (simde__m512 a, simde__m512 b, const int imm8)
default:
HEDLEY_UNREACHABLE();
}

#endif
return simde_mm512_movepi32_mask(simde_mm512_castps_si512(simde__m512_from_private(r_)));
}
#if defined(SIMDE_X86_AVX512F_NATIVE)
Expand Down Expand Up @@ -496,7 +500,11 @@ simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8)
r_,
a_ = simde__m512d_to_private(a),
b_ = simde__m512d_to_private(b);

#if !defined(SIMDE_STATEMENT_EXPR_) && SIMDE_NATURAL_VECTOR_SIZE_LE(128)
for (size_t i = 0 ; i < (sizeof(r_.m128d) / sizeof(r_.m128d[0])) ; i++) {
SIMDE_CONSTIFY_32_(simde_mm_cmp_pd, r_.m128d[i], simde_mm_undefined_pd(), imm8, a_.m128d[i], b_.m128d[i]);
}
#else
switch (imm8) {
case SIMDE_CMP_EQ_OQ:
case SIMDE_CMP_EQ_OS:
Expand Down Expand Up @@ -679,7 +687,7 @@ simde_mm512_cmp_pd_mask (simde__m512d a, simde__m512d b, const int imm8)
default:
HEDLEY_UNREACHABLE();
}

#endif
return simde_mm512_movepi64_mask(simde_mm512_castpd_si512(simde__m512d_from_private(r_)));
}
#if defined(SIMDE_X86_AVX512F_NATIVE)
Expand Down
12 changes: 6 additions & 6 deletions simde/x86/sse.h
Original file line number Diff line number Diff line change
Expand Up @@ -664,7 +664,7 @@ simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)
r_.f32[i] = simde_math_nearbyintf(a_.f32[i]);
}
#else
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_ps());
#endif
break;

Expand All @@ -683,7 +683,7 @@ simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)
r_.f32[i] = simde_math_roundevenf(a_.f32[i]);
}
#else
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_ps());
#endif
break;

Expand All @@ -702,7 +702,7 @@ simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)
r_.f32[i] = simde_math_floorf(a_.f32[i]);
}
#else
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_ps());
#endif
break;

Expand All @@ -721,7 +721,7 @@ simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)
r_.f32[i] = simde_math_ceilf(a_.f32[i]);
}
#else
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_ps());
#endif
break;

Expand All @@ -740,12 +740,12 @@ simde_x_mm_round_ps (simde__m128 a, int rounding, int lax_rounding)
r_.f32[i] = simde_math_truncf(a_.f32[i]);
}
#else
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_ps());
#endif
break;

default:
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_pd());
HEDLEY_UNREACHABLE_RETURN(simde_mm_undefined_ps());
}

return simde__m128_from_private(r_);
Expand Down