Skip to content

Commit

Permalink
Fix issues related to MXCSR register (#1060)
Browse files Browse the repository at this point in the history
  • Loading branch information
M-HT authored Sep 26, 2023
1 parent 24c93c2 commit 653aba8
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 4 deletions.
16 changes: 12 additions & 4 deletions simde/x86/sse.h
Original file line number Diff line number Diff line change
Expand Up @@ -262,6 +262,14 @@ enum {
SIMDE_MM_ROUND_TOWARD_ZERO = 0x6000
#endif
};
#if defined(_MM_ROUND_MASK)
# define SIMDE_MM_ROUND_MASK _MM_ROUND_MASK
#else
# define SIMDE_MM_ROUND_MASK (0x6000)
#endif
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
#define _MM_ROUND_MASK SIMDE_MM_ROUND_MASK
#endif

#if defined(_MM_FROUND_TO_NEAREST_INT)
# define SIMDE_MM_FROUND_TO_NEAREST_INT _MM_FROUND_TO_NEAREST_INT
Expand Down Expand Up @@ -429,7 +437,7 @@ enum {
#endif

SIMDE_FUNCTION_ATTRIBUTES
unsigned int
uint32_t
SIMDE_MM_GET_ROUNDING_MODE(void) {
#if defined(SIMDE_X86_SSE_NATIVE)
return _MM_GET_ROUNDING_MODE();
Expand Down Expand Up @@ -477,7 +485,7 @@ SIMDE_MM_GET_ROUNDING_MODE(void) {

SIMDE_FUNCTION_ATTRIBUTES
void
SIMDE_MM_SET_ROUNDING_MODE(unsigned int a) {
SIMDE_MM_SET_ROUNDING_MODE(uint32_t a) {
#if defined(SIMDE_X86_SSE_NATIVE)
_MM_SET_ROUNDING_MODE(a);
#elif defined(SIMDE_HAVE_FENV_H)
Expand Down Expand Up @@ -531,7 +539,7 @@ SIMDE_MM_GET_FLUSH_ZERO_MODE (void) {
#endif
}
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
#define _MM_SET_FLUSH_ZERO_MODE(a) SIMDE_MM_SET_FLUSH_ZERO_MODE(a)
#define _MM_GET_FLUSH_ZERO_MODE(a) SIMDE_MM_GET_FLUSH_ZERO_MODE(a)
#endif

SIMDE_FUNCTION_ATTRIBUTES
Expand Down Expand Up @@ -566,7 +574,7 @@ simde_mm_setcsr (uint32_t a) {
#if defined(SIMDE_X86_SSE_NATIVE)
_mm_setcsr(a);
#else
SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(unsigned int, a));
SIMDE_MM_SET_ROUNDING_MODE(HEDLEY_STATIC_CAST(uint32_t, a & SIMDE_MM_ROUND_MASK));
#endif
}
#if defined(SIMDE_X86_SSE_ENABLE_NATIVE_ALIASES)
Expand Down
62 changes: 62 additions & 0 deletions test/x86/sse.c
Original file line number Diff line number Diff line change
Expand Up @@ -5766,6 +5766,67 @@ test_simde_MM_TRANSPOSE4_PS (SIMDE_MUNIT_TEST_ARGS) {
#endif
}

static int
test_simde_MXCSR (SIMDE_MUNIT_TEST_ARGS) {
uint32_t original_mxcsr = simde_mm_getcsr();
uint32_t mask_rm_fzm = SIMDE_MM_ROUND_MASK | SIMDE_MM_FLUSH_ZERO_MASK;
uint32_t masked_mxcsr = original_mxcsr & ~mask_rm_fzm;

uint32_t rm_nearest_off, fzm_nearest_off, rm_nearest_on, fzm_nearest_on;
uint32_t rm_down_off, fzm_down_off, rm_down_on, fzm_down_on;
uint32_t rm_up_off, fzm_up_off, rm_up_on, fzm_up_on;
uint32_t rm_zero_off, fzm_zero_off, rm_zero_on, fzm_zero_on;

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_NEAREST | SIMDE_MM_FLUSH_ZERO_OFF);
rm_nearest_off = SIMDE_MM_GET_ROUNDING_MODE();
fzm_nearest_off = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_NEAREST | SIMDE_MM_FLUSH_ZERO_ON);
rm_nearest_on = SIMDE_MM_GET_ROUNDING_MODE();
fzm_nearest_on = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_DOWN | SIMDE_MM_FLUSH_ZERO_OFF);
rm_down_off = SIMDE_MM_GET_ROUNDING_MODE();
fzm_down_off = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_DOWN | SIMDE_MM_FLUSH_ZERO_ON);
rm_down_on = SIMDE_MM_GET_ROUNDING_MODE();
fzm_down_on = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_UP | SIMDE_MM_FLUSH_ZERO_OFF);
rm_up_off = SIMDE_MM_GET_ROUNDING_MODE();
fzm_up_off = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_UP | SIMDE_MM_FLUSH_ZERO_ON);
rm_up_on = SIMDE_MM_GET_ROUNDING_MODE();
fzm_up_on = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_TOWARD_ZERO | SIMDE_MM_FLUSH_ZERO_OFF);
rm_zero_off = SIMDE_MM_GET_ROUNDING_MODE();
fzm_zero_off = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(masked_mxcsr | SIMDE_MM_ROUND_TOWARD_ZERO | SIMDE_MM_FLUSH_ZERO_ON);
rm_zero_on = SIMDE_MM_GET_ROUNDING_MODE();
fzm_zero_on = SIMDE_MM_GET_FLUSH_ZERO_MODE();

simde_mm_setcsr(original_mxcsr);

simde_assert_equal_u32(rm_nearest_off, rm_nearest_on);
simde_assert_equal_u32(rm_down_off, rm_down_on);
simde_assert_equal_u32(rm_up_off, rm_up_on);
simde_assert_equal_u32(rm_zero_off, rm_zero_on);

simde_assert_equal_u32(fzm_nearest_off, fzm_down_off);
simde_assert_equal_u32(fzm_nearest_off, fzm_up_off);
simde_assert_equal_u32(fzm_nearest_off, fzm_zero_off);

simde_assert_equal_u32(fzm_nearest_on, fzm_down_on);
simde_assert_equal_u32(fzm_nearest_on, fzm_up_on);
simde_assert_equal_u32(fzm_nearest_on, fzm_zero_on);

return 0;
}

SIMDE_TEST_FUNC_LIST_BEGIN
SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_ps)
SIMDE_TEST_FUNC_LIST_ENTRY(mm_set_ps1)
Expand Down Expand Up @@ -5915,6 +5976,7 @@ SIMDE_TEST_FUNC_LIST_BEGIN
SIMDE_TEST_FUNC_LIST_ENTRY(mm_stream_ps)
SIMDE_TEST_FUNC_LIST_ENTRY(mm_prefetch)
SIMDE_TEST_FUNC_LIST_ENTRY(MM_TRANSPOSE4_PS)
SIMDE_TEST_FUNC_LIST_ENTRY(MXCSR)
SIMDE_TEST_FUNC_LIST_END

#include <test/x86/test-x86-footer.h>

0 comments on commit 653aba8

Please sign in to comment.