Skip to content

Commit

Permalink
Make native code portable
Browse files Browse the repository at this point in the history
 * minimal macos fixes

Signed-off-by: Won-Kyu Park <[email protected]>
  • Loading branch information
rickardp authored and wkpark committed Feb 5, 2024
1 parent 259ad44 commit 3b3d32a
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 33 deletions.
2 changes: 1 addition & 1 deletion include/Algo-Direct-Common.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ struct DirectInfo
xi = xws;
}
else {
myassert(Gap==1, "if Gap>1 then X workspace must be provided");
myassert((Gap==1), "if Gap>1 then X workspace must be provided");
xi = x;
}

Expand Down
2 changes: 2 additions & 0 deletions include/Algo-Direct2.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ struct AlgoVecBase<I, T, A, typename std::enable_if<DirectAux::IsDirect2<A>::val
private:
typedef AlgoScalarBase<T, A> base_t;

#ifdef USE_SSE2
FORCE_INLINE
//NO_INLINE
void resolve(const FVec<SSE, float>& vz, const IVec<SSE, float>& bidx, uint32 *pr) const
Expand Down Expand Up @@ -135,6 +136,7 @@ struct AlgoVecBase<I, T, A, typename std::enable_if<DirectAux::IsDirect2<A>::val
pr[0] = u.ui32[0];
pr[1] = u.ui32[2];
}
#endif // USE_SSE2

#ifdef USE_AVX

Expand Down
33 changes: 32 additions & 1 deletion include/Portable.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,40 @@
#include <stdexcept>
#include <sstream>

#if defined(__aarch64__)
#ifdef __CUDACC__
#undef USE_NEON // Doesn't work with nvcc, undefined symbols
#else
#include <arm_neon.h>
#undef USE_NEON // Not yet implemented
#endif
#undef USE_AVX // x86_64 only
#undef USE_AVX2 // x86_64 only
#undef USE_SSE2 // x86_64 only
#undef USE_SSE41 // x86_64 only
#undef USE_SSE42 // x86_64 only
#undef USE_FMA // x86_64 only
#ifdef USE_NEON
typedef float32x4_t __m128;
typedef int32x4_t __m128i;
typedef float64x2_t __m128d;
#else
typedef struct {float a; float b; float c; float d;} __m128;
typedef struct {int a; int b; int c; int d;} __m128i;
typedef struct {double a; double b;} __m128d;
#endif
#else
#undef USE_NEON // ARM64 only
#ifdef __FMA__
#define USE_FMA
#endif
#if !defined(__SSE2__) && !defined(_MSC_VER)
#error Compiler must support SSE2
#endif
#define USE_SSE2

#if defined(__aarch64__)
#else
#ifdef __AVX2__
#define USE_AVX2
#endif
Expand All @@ -24,7 +54,8 @@
#ifdef __SSE4_2__
#define USE_SSE42
#endif

#endif
#endif

#ifndef _MSC_VER
#include <stdint.h>
Expand Down
77 changes: 47 additions & 30 deletions include/SIMD.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,46 @@

#include "Portable.h"

#ifdef USE_SSE2
#include <emmintrin.h>
#if defined(USE_AVX) || defined(USE_AVX2)
#include <immintrin.h>
#else
#ifdef USE_SSE41
#include <smmintrin.h>
#endif
#endif
#endif

namespace BinSearch {
namespace Details {

template <InstrSet I, typename T>
struct FTOITraits{};

template <InstrSet I, class T>
struct FVec;

template <InstrSet I, class T>
struct IVec;

template <InstrSet I, class T>
struct FVec1;

template <> struct InstrFloatTraits<Scalar, float>
{
typedef __m128 vec_t;
};

template <> struct InstrFloatTraits<Scalar, double>
{
typedef __m128d vec_t;
};

}
}

#if !defined(__aarch64__)
#ifdef USE_SSE42
#ifndef _MSC_VER
#include <popcntintrin.h>
Expand All @@ -26,29 +66,11 @@ FORCE_INLINE int popcnt32(int x32)
} // namespace
#endif

#if defined(USE_AVX) || defined(USE_AVX2)
#include <immintrin.h>
#else
#include <emmintrin.h>
#ifdef USE_SSE41
#include <smmintrin.h>
#endif
#endif

#include "Type.h"

namespace BinSearch {
namespace Details {

template <InstrSet I, class T>
struct FVec;

template <InstrSet I, class T>
struct IVec;

template <InstrSet I, class T>
struct FVec1;

template <> struct InstrIntTraits<SSE>
{
typedef __m128i vec_t;
Expand All @@ -64,18 +86,8 @@ template <> struct InstrFloatTraits<SSE, double>
typedef __m128d vec_t;
};

template <> struct InstrFloatTraits<Scalar, float>
{
typedef float vec_t;
};

template <> struct InstrFloatTraits<Scalar, double>
{
typedef double vec_t;
};

template <InstrSet I, typename T>
struct FTOITraits
template <>
struct FTOITraits<SSE, float>
{
typedef IVec<SSE, float> vec_t;
};
Expand Down Expand Up @@ -295,9 +307,11 @@ FORCE_INLINE FVec<SSE,float> operator- (const FVec<SSE,float>& a, const FVec<
FORCE_INLINE FVec<SSE,float> operator* (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_mul_ps( a, b ); }
FORCE_INLINE FVec<SSE,float> operator/ (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_div_ps( a, b ); }
FORCE_INLINE IVec<SSE,float> ftoi (const FVec<SSE,float>& a) { return _mm_cvttps_epi32(a); }
#ifndef __clang__ // Conflicts with builtin operator
FORCE_INLINE IVec<SSE,float> operator<= (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_castps_si128( _mm_cmple_ps( a, b ) ); }
FORCE_INLINE IVec<SSE,float> operator>= (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_castps_si128( _mm_cmpge_ps( a, b ) ); }
FORCE_INLINE IVec<SSE,float> operator< (const FVec<SSE,float>& a, const FVec<SSE,float>& b) { return _mm_castps_si128(_mm_cmplt_ps(a, b)); }
#endif
#ifdef USE_FMA
FORCE_INLINE FVec<SSE, float> mulSub(const FVec<SSE, float>& a, const FVec<SSE, float>& b, const FVec<SSE, float>& c) { return _mm_fmsub_ps(a, b, c); }
#endif
Expand Down Expand Up @@ -349,9 +363,11 @@ FORCE_INLINE FVec<SSE,double> operator- (const FVec<SSE,double>& a, const FVec
FORCE_INLINE FVec<SSE,double> operator* (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_mul_pd( a, b ); }
FORCE_INLINE FVec<SSE,double> operator/ (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_div_pd( a, b ); }
FORCE_INLINE IVec<SSE,float> ftoi (const FVec<SSE,double>& a) { return _mm_cvttpd_epi32(a); }
#ifndef __clang__ // Conflicts with builtin operator
FORCE_INLINE IVec<SSE,double> operator<= (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_castpd_si128( _mm_cmple_pd( a, b ) ); }
FORCE_INLINE IVec<SSE,double> operator< (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_castpd_si128(_mm_cmplt_pd(a, b)); }
FORCE_INLINE IVec<SSE,double> operator>= (const FVec<SSE,double>& a, const FVec<SSE,double>& b) { return _mm_castpd_si128( _mm_cmpge_pd( a, b ) ); }
#endif
#ifdef USE_FMA
FORCE_INLINE FVec<SSE, double> mulSub(const FVec<SSE, double>& a, const FVec<SSE, double>& b, const FVec<SSE, double>& c ) { return _mm_fmsub_pd(a, b, c); }
#endif
Expand Down Expand Up @@ -570,3 +586,4 @@ FORCE_INLINE FVec<AVX, double> mulSub(const FVec<AVX, double>& a, const FVec<AVX

} // namepsace Details
} // namespace BinSearch
#endif // !defined(__aarch64__)
2 changes: 1 addition & 1 deletion include/Type.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ using std::size_t;

namespace BinSearch {

enum InstrSet { Scalar, SSE, AVX };
enum InstrSet { Scalar, SSE, AVX, Neon };

#define ALGOENUM(x, b) x,
enum Algos
Expand Down

0 comments on commit 3b3d32a

Please sign in to comment.