Skip to content

Commit

Permalink
The switch for sse1
Browse files Browse the repository at this point in the history
  • Loading branch information
Fedor committed Jan 7, 2024
1 parent 7b56d34 commit af8ad28
Show file tree
Hide file tree
Showing 41 changed files with 2,086 additions and 936 deletions.
2 changes: 1 addition & 1 deletion dom/base/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,7 @@ SOURCES += [

# Are we targeting x86-32 or x86-64? If so, we want to include SSE2 code for
# nsTextFragment.cpp
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += ['nsTextFragmentSSE2.cpp']
SOURCES['nsTextFragmentSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']

Expand Down
2 changes: 1 addition & 1 deletion dom/media/webaudio/blink/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ UNIFIED_SOURCES += [
]

# Are we targeting x86 or x64? If so, build SSE2 files.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
DEFINES['USE_SSE2'] = True

include('/ipc/chromium/chromium-config.mozbuild')
Expand Down
2 changes: 1 addition & 1 deletion dom/media/webaudio/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ if CONFIG['CPU_ARCH'] == 'aarch64' or CONFIG['BUILD_ARM_NEON']:
]

# Are we targeting x86 or x64? If so, build SSE2 files.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += ['AudioNodeEngineSSE2.cpp']
DEFINES['USE_SSE2'] = True
SOURCES['AudioNodeEngineSSE2.cpp'].flags += CONFIG['SSE2_FLAGS']
Expand Down
2 changes: 1 addition & 1 deletion gfx/2d/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ if CONFIG['MOZ_ENABLE_SKIA']:
]

# Are we targeting x86 or x64? If so, build SSE2 files.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += [
'BlurSSE2.cpp',
'FilterProcessingSSE2.cpp',
Expand Down
4 changes: 2 additions & 2 deletions gfx/angle/checkout/src/common/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,10 @@
# undef far
#endif

#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64)
#if defined(_MSC_VER) && !defined(_M_ARM) && !defined(_M_ARM64) && !defined(THE_SSE1)
# include <intrin.h>
# define ANGLE_USE_SSE
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__))
#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) && !defined(THE_SSE1)
# include <x86intrin.h>
# define ANGLE_USE_SSE
#endif
Expand Down
2 changes: 1 addition & 1 deletion gfx/angle/moz.build.common
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
AllowCompilerWarnings()

if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
CXXFLAGS += CONFIG['SSE2_FLAGS']
DEFINES['__NDK_FPABI__'] = ''
DEFINES['ANGLE_SKIP_DXGI_1_2_CHECK'] = True
Expand Down
4 changes: 3 additions & 1 deletion gfx/cairo/libpixman/src/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -86,8 +86,10 @@ use_sse2 = False
use_vmx = False
use_arm_simd_gcc = False
use_arm_neon_gcc = False
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
use_sse2 = True

if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['CPU_ARCH'] == 'x86':
if CONFIG['CC_TYPE'] == 'clang-cl':
use_mmx = True
Expand Down
4 changes: 2 additions & 2 deletions gfx/layers/basic/BasicCompositor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,7 @@ static bool AttemptVideoScale(TextureSourceBasic* aSource,
const gfx::Rect& aRect,
const gfx::Rect& aClipRect, DrawTarget* aDest,
const DrawTarget* aBuffer) {
#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
if (!mozilla::supports_ssse3()) return false;
if (aNewTransform
.IsTranslation()) // unscaled painting should take the regular path
Expand Down Expand Up @@ -563,7 +563,7 @@ static bool AttemptVideoConvertAndScale(
WrappingTextureSourceYCbCrBasic* wrappingSource =
aSource->AsWrappingTextureSourceYCbCrBasic();
if (!wrappingSource) return false;
#ifdef MOZILLA_SSE_HAVE_CPUID_DETECTION
#if defined(MOZILLA_SSE_HAVE_CPUID_DETECTION) && !defined(THE_SSE1)
if (!mozilla::supports_ssse3()) // libyuv requests SSSE3 for fast YUV
// conversion.
return false;
Expand Down
4 changes: 3 additions & 1 deletion gfx/qcms/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,10 @@ if CONFIG['CC_TYPE'] in ('clang', 'gcc'):
use_sse1 = False
use_sse2 = False
use_altivec = False
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
use_sse2 = True

if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['CC_TYPE'] == 'clang-cl':
if CONFIG['OS_ARCH'] != 'WINNT' or CONFIG['CPU_ARCH'] != 'x86_64':
use_sse1 = True
Expand Down
128 changes: 23 additions & 105 deletions gfx/qcms/transform.c
Original file line number Diff line number Diff line change
Expand Up @@ -442,6 +442,7 @@ static void qcms_transform_data_graya_out_precache(qcms_transform *transform, un
}
}

#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
static void qcms_transform_data_rgb_out_lut_precache(qcms_transform *transform, unsigned char *src, unsigned char *dest, size_t length)
{
unsigned int i;
Expand Down Expand Up @@ -511,6 +512,7 @@ static void qcms_transform_data_rgba_out_lut_precache(qcms_transform *transform,
dest += RGBA_OUTPUT_COMPONENTS;
}
}
#endif

// Not used
/*
Expand Down Expand Up @@ -1011,87 +1013,6 @@ void qcms_transform_release(qcms_transform *t)
transform_free(t);
}

#ifdef X86
// Determine if we can build with SSE2 (this was partly copied from jmorecfg.h in
// mozilla/jpeg)
// -------------------------------------------------------------------------
#if defined(_M_IX86) && defined(_MSC_VER)
#define HAS_CPUID
/* Get us a CPUID function. Avoid clobbering EBX because sometimes it's the PIC
register - I'm not sure if that ever happens on windows, but cpuid isn't
on the critical path so we just preserve the register to be safe and to be
consistent with the non-windows version. */
static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {
uint32_t a_, b_, c_, d_;
__asm {
xchg ebx, esi
mov eax, fxn
cpuid
mov a_, eax
mov b_, ebx
mov c_, ecx
mov d_, edx
xchg ebx, esi
}
*a = a_;
*b = b_;
*c = c_;
*d = d_;
}
#elif (defined(__GNUC__) || defined(__SUNPRO_C)) && (defined(__i386__) || defined(__i386))
#define HAS_CPUID
/* Get us a CPUID function. We can't use ebx because it's the PIC register on
some platforms, so we use ESI instead and save ebx to avoid clobbering it. */
static void cpuid(uint32_t fxn, uint32_t *a, uint32_t *b, uint32_t *c, uint32_t *d) {

uint32_t a_, b_, c_, d_;
__asm__ __volatile__ ("xchgl %%ebx, %%esi; cpuid; xchgl %%ebx, %%esi;"
: "=a" (a_), "=S" (b_), "=c" (c_), "=d" (d_) : "a" (fxn));
*a = a_;
*b = b_;
*c = c_;
*d = d_;
}
#endif

// -------------------------Runtime SSEx Detection-----------------------------

/* MMX is always supported per
* Gecko v1.9.1 minimum CPU requirements */
#define SSE1_EDX_MASK (1UL << 25)
#define SSE2_EDX_MASK (1UL << 26)
#define SSE3_ECX_MASK (1UL << 0)

static int sse_version_available(void)
{
#if defined(__x86_64__) || defined(__x86_64) || defined(_M_AMD64)
/* we know at build time that 64-bit CPUs always have SSE2
* this tells the compiler that non-SSE2 branches will never be
* taken (i.e. OK to optimze away the SSE1 and non-SIMD code */
return 2;
#elif defined(HAS_CPUID)
static int sse_version = -1;
uint32_t a, b, c, d;
uint32_t function = 0x00000001;

if (sse_version == -1) {
sse_version = 0;
cpuid(function, &a, &b, &c, &d);
if (c & SSE3_ECX_MASK)
sse_version = 3;
else if (d & SSE2_EDX_MASK)
sse_version = 2;
else if (d & SSE1_EDX_MASK)
sse_version = 1;
}

return sse_version;
#else
return 0;
#endif
}
#endif

static const struct matrix bradford_matrix = {{ { 0.8951f, 0.2664f,-0.1614f},
{-0.7502f, 1.7135f, 0.0367f},
{ 0.0389f,-0.0685f, 1.0296f}},
Expand Down Expand Up @@ -1291,39 +1212,36 @@ qcms_transform* qcms_transform_create(
return NULL;
}
if (precache) {
#ifdef X86
if (sse_version_available() >= 2) {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;

#if !(defined(_MSC_VER) && defined(_M_AMD64))
/* Microsoft Compiler for x64 doesn't support MMX.
* SSE code uses MMX so that we disable on x64 */
} else
if (sse_version_available() >= 1) {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
#if defined(X86)
#if !defined(THE_SSE1)
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse2;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse2;

#elif !(defined(_MSC_VER) && defined(_M_AMD64)) || defined(THE_SSE1)
/* Microsoft Compiler for x64 doesn't support MMX.
* SSE code uses MMX so that we disable on x64 */
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_sse1;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_sse1;
#endif
} else
#endif
#if (defined(__POWERPC__) || defined(__powerpc__) && !defined(__NO_FPRS__))
if (have_altivec()) {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_altivec;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_altivec;
} else
} else {

if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
}
#endif
{
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut_precache;
else
transform->transform_fn = qcms_transform_data_rgba_out_lut_precache;
}
} else {
if (in_type == QCMS_DATA_RGB_8)
transform->transform_fn = qcms_transform_data_rgb_out_lut;
Expand Down
2 changes: 1 addition & 1 deletion gfx/thebes/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ elif CONFIG['MOZ_WIDGET_TOOLKIT'] == 'windows':
]

# Are we targeting x86 or x64? If so, build gfxAlphaRecoverySSE2.cpp.
if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
SOURCES += ['gfxAlphaRecoverySSE2.cpp']
# The file uses SSE2 intrinsics, so it needs special compile flags on some
# compilers.
Expand Down
5 changes: 3 additions & 2 deletions gfx/ycbcr/moz.build
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,13 @@ UNIFIED_SOURCES += [
'yuv_row_table.cpp',
]

if CONFIG['INTEL_ARCHITECTURE']:
if CONFIG['INTEL_ARCHITECTURE'] and not CONFIG['THE_SSE1']:
# These files use MMX and SSE2 intrinsics, so they need special compile flags
# on some compilers.
SOURCES += ['yuv_convert_sse2.cpp']
SOURCES['yuv_convert_sse2.cpp'].flags += CONFIG['SSE2_FLAGS']

if CONFIG['INTEL_ARCHITECTURE']:
# MSVC doesn't support MMX when targeting AMD64.
if CONFIG['CC_TYPE'] == 'clang-cl':
if CONFIG['CPU_ARCH'] == 'x86':
Expand All @@ -33,7 +34,7 @@ if CONFIG['INTEL_ARCHITECTURE']:

if CONFIG['CC_TYPE'] == 'clang-cl':
if CONFIG['CPU_ARCH'] == 'x86_64' or \
(CONFIG['CPU_ARCH'] == 'x86' and CONFIG['CC_TYPE'] == 'clang-cl'):
(CONFIG['CPU_ARCH'] == 'x86' and CONFIG['CC_TYPE'] == 'clang-cl' and not CONFIG['THE_SSE1']):
SOURCES += [
'yuv_row_win64.cpp',
]
Expand Down
9 changes: 7 additions & 2 deletions js/src/old-configure.in
Original file line number Diff line number Diff line change
Expand Up @@ -579,11 +579,16 @@ case "$target" in
dnl more recent, so set that explicitly here unless another
dnl target arch has already been set.
changequote(,)
if test "$THE_SSE1" = 1; then
SSE2_FLAGS="-arch:SSE"
else
SSE2_FLAGS="-arch:SSE2"
fi
if test -z `echo $CFLAGS | grep -i [-/]arch:` ; then
CFLAGS="$CFLAGS -arch:SSE2"
CFLAGS="$CFLAGS $SSE2_FLAGS"
fi
if test -z `echo $CXXFLAGS | grep -i [-/]arch:` ; then
CXXFLAGS="$CXXFLAGS -arch:SSE2"
CXXFLAGS="$CXXFLAGS $SSE2_FLAGS"
fi
changequote([,])
fi
Expand Down
8 changes: 8 additions & 0 deletions media/libaom/config/win/ia32/config/aom_config.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,11 +69,19 @@
#define HAVE_MSA 0
#define HAVE_NEON 0
#define HAVE_SSE 1
#ifndef THE_SSE1
#define HAVE_SSE2 1
#define HAVE_SSE3 1
#define HAVE_SSE4_1 1
#define HAVE_SSE4_2 1
#define HAVE_SSSE3 1
#else
#define HAVE_SSE2 0
#define HAVE_SSE3 0
#define HAVE_SSE4_1 0
#define HAVE_SSE4_2 0
#define HAVE_SSSE3 0
#endif
#define HAVE_VSX 0
#define HAVE_WXWIDGETS 0
#define INCLUDE_INSTALL_DIR INSTALLDIR/include
Expand Down
Loading

0 comments on commit af8ad28

Please sign in to comment.