From 0800521cb3171422613653ef95246bfff592c767 Mon Sep 17 00:00:00 2001 From: Alex Shepeliev Date: Fri, 18 Oct 2024 11:51:31 +0300 Subject: [PATCH] Upgrade Kotlin to v2.0.21 and other deps (#145) * Upgrade Kotlin to 2.0.21 * Fix _Float16 error for iosX64 target --- build.gradle.kts | 3 + buildSrc/build.gradle.kts | 8 - buildSrc/settings.gradle.kts | 13 - .../src/main/kotlin/KotlinAndroidTarget.kt | 7 - .../kotlin/KotlinMultiplatformExtension.kt | 15 - .../kotlin/webrtc.multiplatform.gradle.kts | 37 - gradle/libs.versions.toml | 27 +- sample/composeApp/build.gradle.kts | 25 +- .../iosApp/iosApp.xcodeproj/project.pbxproj | 36 +- vfsoverlay/base.h | 151 + vfsoverlay/common.h | 4525 ++++++++++ vfsoverlay/conversion.h | 2032 +++++ vfsoverlay/extern.h | 49 + vfsoverlay/geometry.h | 1100 +++ vfsoverlay/logic.h | 1315 +++ vfsoverlay/math.h | 5996 +++++++++++++ vfsoverlay/matrix.h | 1990 +++++ vfsoverlay/matrix_types.h | 525 ++ vfsoverlay/overlay.yaml | 28 + vfsoverlay/packed.h | 1031 +++ vfsoverlay/quaternion.h | 1194 +++ vfsoverlay/simd.h | 30 + vfsoverlay/types.h | 128 + vfsoverlay/vector.h | 52 + vfsoverlay/vector_make.h | 7874 +++++++++++++++++ vfsoverlay/vector_types.h | 1281 +++ webrtc-kmp/build.gradle.kts | 34 +- 27 files changed, 29376 insertions(+), 130 deletions(-) delete mode 100644 buildSrc/build.gradle.kts delete mode 100644 buildSrc/settings.gradle.kts delete mode 100644 buildSrc/src/main/kotlin/KotlinAndroidTarget.kt delete mode 100644 buildSrc/src/main/kotlin/KotlinMultiplatformExtension.kt delete mode 100644 buildSrc/src/main/kotlin/webrtc.multiplatform.gradle.kts create mode 100644 vfsoverlay/base.h create mode 100644 vfsoverlay/common.h create mode 100644 vfsoverlay/conversion.h create mode 100644 vfsoverlay/extern.h create mode 100644 vfsoverlay/geometry.h create mode 100644 vfsoverlay/logic.h create mode 100644 vfsoverlay/math.h create mode 100644 vfsoverlay/matrix.h create mode 100644 vfsoverlay/matrix_types.h create mode 100644 vfsoverlay/overlay.yaml create mode 100644 vfsoverlay/packed.h create mode 100644 vfsoverlay/quaternion.h create mode 100644 vfsoverlay/simd.h create mode 100644 vfsoverlay/types.h create mode 100644 vfsoverlay/vector.h create mode 100644 vfsoverlay/vector_make.h create mode 100644 vfsoverlay/vector_types.h diff --git a/build.gradle.kts b/build.gradle.kts index 2a90d3cf..3cb8db84 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -3,6 +3,9 @@ import java.util.Properties plugins { alias(libs.plugins.ktlint) alias(libs.plugins.nexus) + alias(libs.plugins.kotlinMultiplatform) apply false + alias(libs.plugins.androidApplication) apply false + alias(libs.plugins.androidLibrary) apply false alias(libs.plugins.jetbrains.compose) apply false } diff --git a/buildSrc/build.gradle.kts b/buildSrc/build.gradle.kts deleted file mode 100644 index 618f6814..00000000 --- a/buildSrc/build.gradle.kts +++ /dev/null @@ -1,8 +0,0 @@ -plugins { - `kotlin-dsl` -} - -dependencies { - implementation(libs.kotlin.plugin) - implementation(libs.agp.plugin) -} diff --git a/buildSrc/settings.gradle.kts b/buildSrc/settings.gradle.kts deleted file mode 100644 index 75df2751..00000000 --- a/buildSrc/settings.gradle.kts +++ /dev/null @@ -1,13 +0,0 @@ -dependencyResolutionManagement { - versionCatalogs { - create("libs") { - from(files("../gradle/libs.versions.toml")) - } - } - - repositories { - gradlePluginPortal() - mavenCentral() - google() - } -} diff --git a/buildSrc/src/main/kotlin/KotlinAndroidTarget.kt b/buildSrc/src/main/kotlin/KotlinAndroidTarget.kt deleted file mode 100644 index 4acc43b1..00000000 --- a/buildSrc/src/main/kotlin/KotlinAndroidTarget.kt +++ /dev/null @@ -1,7 +0,0 @@ -import org.jetbrains.kotlin.gradle.plugin.mpp.KotlinAndroidTarget - -fun KotlinAndroidTarget.configureJvmTarget(jvmVersion: String = "1.8") { - compilations.all { - kotlinOptions.jvmTarget = jvmVersion - } -} diff --git a/buildSrc/src/main/kotlin/KotlinMultiplatformExtension.kt b/buildSrc/src/main/kotlin/KotlinMultiplatformExtension.kt deleted file mode 100644 index 00627579..00000000 --- a/buildSrc/src/main/kotlin/KotlinMultiplatformExtension.kt +++ /dev/null @@ -1,15 +0,0 @@ -import org.jetbrains.kotlin.gradle.dsl.KotlinMultiplatformExtension - -fun KotlinMultiplatformExtension.configureKotlinCompilerArgs(vararg args: String) { - targets.all { - compilations.all { - kotlinOptions { - freeCompilerArgs += setOf( - "-opt-in=kotlin.RequiresOptIn", - "-Xexpect-actual-classes", - *args - ) - } - } - } -} diff --git a/buildSrc/src/main/kotlin/webrtc.multiplatform.gradle.kts b/buildSrc/src/main/kotlin/webrtc.multiplatform.gradle.kts deleted file mode 100644 index 747520de..00000000 --- a/buildSrc/src/main/kotlin/webrtc.multiplatform.gradle.kts +++ /dev/null @@ -1,37 +0,0 @@ -plugins { - id("com.android.library") - kotlin("multiplatform") -} - -kotlin { - configureKotlinCompilerArgs() - - androidTarget { - configureJvmTarget() - } -} - -android { - compileSdk = androidCompileSdkVersion - - sourceSets["main"].manifest.srcFile("src/androidMain/AndroidManifest.xml") - sourceSets["main"].res.srcDir("src/androidMain/res") - - defaultConfig { - minSdk = androidMinSdkVersion - } - - compileOptions { - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 - } -} - -private val Project.versionCatalog: VersionCatalog - get() = extensions.getByType().named("libs") - -private val Project.androidCompileSdkVersion: Int - get() = "${versionCatalog.findVersion("compileSdk").get()}".toInt() - -private val Project.androidMinSdkVersion: Int - get() = "${versionCatalog.findVersion("minSdk").get()}".toInt() diff --git a/gradle/libs.versions.toml b/gradle/libs.versions.toml index b41632a8..24bd1a7f 100644 --- a/gradle/libs.versions.toml +++ b/gradle/libs.versions.toml @@ -1,15 +1,15 @@ [versions] -kotlin = "2.0.10" -kotlin-coroutines = "1.8.0" +kotlin = "2.0.21" +kotlin-coroutines = "1.9.0" androidx-activity-compose = "1.9.0" -androidx-appcompat = "1.6.1" -androidx-core = "1.13.0" -androidx-material = "1.11.0" -androidx-lifecycle = "2.7.0" -androidx-startup = "1.1.1" -androidx-test-core = "1.5.0" -androidx-test-runner = "1.5.2" -androidx-test-rules = "1.5.0" +androidx-appcompat = "1.7.0" +androidx-core = "1.13.1" +androidx-material = "1.12.0" +androidx-lifecycle = "2.8.6" +androidx-startup = "1.2.0" +androidx-test-core = "1.6.1" +androidx-test-runner = "1.6.2" +androidx-test-rules = "1.6.1" accompanist-permision = "0.34.0" kermit = "2.0.3" kotlin-wrappers = "1.0.0-pre.732" @@ -49,12 +49,11 @@ kotlin-wrappers-react = { module = "org.jetbrains.kotlin-wrappers:kotlin-react" kotlin-wrappers-reactDom = { module = "org.jetbrains.kotlin-wrappers:kotlin-react-dom" } kotlin-wrappers-mui = { module = "org.jetbrains.kotlin-wrappers:kotlin-mui-material" } -# Plugin dependencies -kotlin-plugin = { module = "org.jetbrains.kotlin:kotlin-gradle-plugin", version.ref = "kotlin" } -agp-plugin = { module = "com.android.tools.build:gradle", version.ref = "agp" } - [plugins] ktlint = { id = "org.jlleitschuh.gradle.ktlint", version.ref = "ktlint" } nexus = { id = "io.github.gradle-nexus.publish-plugin", version.ref = "nexus" } jetbrains-compose = { id = "org.jetbrains.compose", version.ref = "compose-plugin" } compose-compiler = { id = "org.jetbrains.kotlin.plugin.compose", version.ref = "kotlin" } +kotlinMultiplatform = { id = "org.jetbrains.kotlin.multiplatform", version.ref = "kotlin" } +androidApplication = { id = "com.android.application", version.ref = "agp" } +androidLibrary = { id = "com.android.library", version.ref = "agp" } diff --git a/sample/composeApp/build.gradle.kts b/sample/composeApp/build.gradle.kts index 724adf97..837f7579 100644 --- a/sample/composeApp/build.gradle.kts +++ b/sample/composeApp/build.gradle.kts @@ -1,21 +1,18 @@ -import org.jetbrains.kotlin.gradle.targets.js.dsl.ExperimentalWasmDsl +import org.jetbrains.kotlin.gradle.ExperimentalKotlinGradlePluginApi +import org.jetbrains.kotlin.gradle.ExperimentalWasmDsl +import org.jetbrains.kotlin.gradle.dsl.JvmTarget import org.jetbrains.kotlin.gradle.targets.js.webpack.KotlinWebpackConfig -import org.jetbrains.kotlin.gradle.plugin.mpp.KotlinNativeTarget import org.jetbrains.kotlin.gradle.plugin.mpp.NativeBuildType - plugins { - kotlin("multiplatform") - id("com.android.application") - kotlin("native.cocoapods") - + alias(libs.plugins.kotlinMultiplatform) + alias(libs.plugins.androidApplication) alias(libs.plugins.jetbrains.compose) alias(libs.plugins.compose.compiler) + kotlin("native.cocoapods") } kotlin { - configureKotlinCompilerArgs() - cocoapods { version = "1.0" summary = "Compose app" @@ -26,6 +23,7 @@ kotlin { version = libs.versions.webrtc.ios.sdk.get() moduleName = "WebRTC" packageName = "WebRTC" + linkOnly = true } podfile = project.file("../iosApp/Podfile") @@ -40,8 +38,11 @@ kotlin { xcodeConfigurationToNativeBuildType["CUSTOM_RELEASE"] = NativeBuildType.RELEASE } + @OptIn(ExperimentalKotlinGradlePluginApi::class) androidTarget { - configureJvmTarget() + compilerOptions { + jvmTarget = JvmTarget.JVM_1_8 + } } iosX64() @@ -134,7 +135,3 @@ android { debugImplementation(compose.uiTooling) } } - -compose.experimental { - web.application {} -} diff --git a/sample/iosApp/iosApp.xcodeproj/project.pbxproj b/sample/iosApp/iosApp.xcodeproj/project.pbxproj index ad7acba3..b8d2c6b6 100644 --- a/sample/iosApp/iosApp.xcodeproj/project.pbxproj +++ b/sample/iosApp/iosApp.xcodeproj/project.pbxproj @@ -125,8 +125,8 @@ 7555FF77242A565900829871 /* Sources */, B92378962B6B1156000C7307 /* Frameworks */, 7555FF79242A565900829871 /* Resources */, - 931F3CF07987B7B6B6CCC6ED /* [CP] Copy Pods Resources */, - F3D8C08FB2F2F9B33C43EA14 /* [CP] Embed Pods Frameworks */, + 9D30E8BF2257B72354B2C936 /* [CP] Embed Pods Frameworks */, + 02E3B32A3BA45C3A2057CD5D /* [CP] Copy Pods Resources */, ); buildRules = ( ); @@ -184,46 +184,46 @@ /* End PBXResourcesBuildPhase section */ /* Begin PBXShellScriptBuildPhase section */ - 5BCD90D29F45499A62B89A70 /* [CP] Check Pods Manifest.lock */ = { + 02E3B32A3BA45C3A2057CD5D /* [CP] Copy Pods Resources */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; files = ( ); inputFileListPaths = ( + "${PODS_ROOT}/Target Support Files/Pods-iosApp/Pods-iosApp-resources-${CONFIGURATION}-input-files.xcfilelist", ); - inputPaths = ( - "${PODS_PODFILE_DIR_PATH}/Podfile.lock", - "${PODS_ROOT}/Manifest.lock", - ); - name = "[CP] Check Pods Manifest.lock"; + name = "[CP] Copy Pods Resources"; outputFileListPaths = ( - ); - outputPaths = ( - "$(DERIVED_FILE_DIR)/Pods-iosApp-checkManifestLockResult.txt", + "${PODS_ROOT}/Target Support Files/Pods-iosApp/Pods-iosApp-resources-${CONFIGURATION}-output-files.xcfilelist", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; + shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-iosApp/Pods-iosApp-resources.sh\"\n"; showEnvVarsInLog = 0; }; - 931F3CF07987B7B6B6CCC6ED /* [CP] Copy Pods Resources */ = { + 5BCD90D29F45499A62B89A70 /* [CP] Check Pods Manifest.lock */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; files = ( ); inputFileListPaths = ( - "${PODS_ROOT}/Target Support Files/Pods-iosApp/Pods-iosApp-resources-${CONFIGURATION}-input-files.xcfilelist", ); - name = "[CP] Copy Pods Resources"; + inputPaths = ( + "${PODS_PODFILE_DIR_PATH}/Podfile.lock", + "${PODS_ROOT}/Manifest.lock", + ); + name = "[CP] Check Pods Manifest.lock"; outputFileListPaths = ( - "${PODS_ROOT}/Target Support Files/Pods-iosApp/Pods-iosApp-resources-${CONFIGURATION}-output-files.xcfilelist", + ); + outputPaths = ( + "$(DERIVED_FILE_DIR)/Pods-iosApp-checkManifestLockResult.txt", ); runOnlyForDeploymentPostprocessing = 0; shellPath = /bin/sh; - shellScript = "\"${PODS_ROOT}/Target Support Files/Pods-iosApp/Pods-iosApp-resources.sh\"\n"; + shellScript = "diff \"${PODS_PODFILE_DIR_PATH}/Podfile.lock\" \"${PODS_ROOT}/Manifest.lock\" > /dev/null\nif [ $? != 0 ] ; then\n # print error to STDERR\n echo \"error: The sandbox is not in sync with the Podfile.lock. Run 'pod install' or update your CocoaPods installation.\" >&2\n exit 1\nfi\n# This output is used by Xcode 'outputs' to avoid re-running this script phase.\necho \"SUCCESS\" > \"${SCRIPT_OUTPUT_FILE_0}\"\n"; showEnvVarsInLog = 0; }; - F3D8C08FB2F2F9B33C43EA14 /* [CP] Embed Pods Frameworks */ = { + 9D30E8BF2257B72354B2C936 /* [CP] Embed Pods Frameworks */ = { isa = PBXShellScriptBuildPhase; buildActionMask = 2147483647; files = ( diff --git a/vfsoverlay/base.h b/vfsoverlay/base.h new file mode 100644 index 00000000..41b0bd27 --- /dev/null +++ b/vfsoverlay/base.h @@ -0,0 +1,151 @@ +/*! @header + * This header defines macros used in the implementation of + * types and functions. Even though they are exposed in a public header, + * the macros defined in this header are implementation details, and you + * should not use or rely on them. They may be changed or removed entirely + * in a future release. + * + * @copyright 2016-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_BASE +#define SIMD_BASE + +/* Define __has_attribute and __has_include if they aren't available */ +# ifndef __has_attribute +# define __has_attribute(__x) 0 +# endif +# ifndef __has_include +# define __has_include(__x) 0 +# endif +# ifndef __has_feature +# define __has_feature(__x) 0 +# endif + +# if __has_attribute(__ext_vector_type__) && __has_attribute(__overloadable__) +# define SIMD_COMPILER_HAS_REQUIRED_FEATURES 1 +# else +/* Your compiler is missing one or more features that are hard requirements + * for any support. None of the types or functions defined by + * the simd headers will be available. */ +# define SIMD_COMPILER_HAS_REQUIRED_FEATURES 0 +# endif + +# if SIMD_COMPILER_HAS_REQUIRED_FEATURES +# if __has_include() && __has_include() +# include +# include +/* A number of new features are added in newer releases; most of these are + * inline in the header, which makes them available even when targeting older + * OS versions. Those that make external calls, however, are only available + * when targeting the release in which they became available. Because of the + * way in which simd functions are overloaded, the usual weak-linking tricks + * do not work; these functions are simply unavailable when targeting older + * versions of the library. */ +# if TARGET_OS_RTKIT +# define SIMD_LIBRARY_VERSION 5 +# elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_13_0 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_16_0 || \ + __WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_9_0 || \ + __TV_OS_VERSION_MIN_REQUIRED >= __TVOS_16_0 || \ + __BRIDGE_OS_VERSION_MIN_REQUIRED >= 70000 || \ + __DRIVERKIT_VERSION_MIN_REQUIRED >= __DRIVERKIT_22_0 +# define SIMD_LIBRARY_VERSION 5 +# elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_12_0 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_15_0 || \ + __WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_8_0 || \ + __TV_OS_VERSION_MIN_REQUIRED >= __TVOS_15_0 || \ + __BRIDGE_OS_VERSION_MIN_REQUIRED >= 60000 || \ + __DRIVERKIT_VERSION_MIN_REQUIRED >= __DRIVERKIT_21_0 +# define SIMD_LIBRARY_VERSION 4 +# elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_13 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_11_0 || \ + __WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_4_0 || \ + __TV_OS_VERSION_MIN_REQUIRED >= __TVOS_11_0 || \ + __DRIVERKIT_VERSION_MIN_REQUIRED >= __DRIVERKIT_19_0 +# define SIMD_LIBRARY_VERSION 3 +# elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_12 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_10_0 || \ + __WATCH_OS_VERSION_MIN_REQUIRED >= __WATCHOS_3_0 || \ + __TV_OS_VERSION_MIN_REQUIRED >= __TVOS_10_0 +# define SIMD_LIBRARY_VERSION 2 +# elif __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_10 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_8_0 +# define SIMD_LIBRARY_VERSION 1 +# else +# define SIMD_LIBRARY_VERSION 0 +# endif +# else /* !__has_include() && __has_include() */ +# define SIMD_LIBRARY_VERSION 5 +# define __API_AVAILABLE(...) /* Nothing */ +# endif + +/* The simd types interoperate with the native simd intrinsic types for each + * architecture; the headers that define those types and operations are + * automatically included with simd.h */ +# if defined __ARM_NEON__ +# include +# elif defined __i386__ || defined __x86_64__ +# include +# endif + +/* Define a number of function attributes used by the simd functions. */ +# if __has_attribute(__always_inline__) +# define SIMD_INLINE __attribute__((__always_inline__)) +# else +# define SIMD_INLINE inline +# endif + +# if __has_attribute(__const__) +# define SIMD_CONST __attribute__((__const__)) +# else +# define SIMD_CONST /* nothing */ +# endif + +# if __has_attribute(__nodebug__) +# define SIMD_NODEBUG __attribute__((__nodebug__)) +# else +# define SIMD_NODEBUG /* nothing */ +# endif + +# if __has_attribute(__deprecated__) +# define SIMD_DEPRECATED(message) __attribute__((__deprecated__(message))) +# else +# define SIMD_DEPRECATED(message) /* nothing */ +# endif + +#define SIMD_OVERLOAD __attribute__((__overloadable__)) +#define SIMD_CPPFUNC SIMD_INLINE SIMD_CONST SIMD_NODEBUG +#define SIMD_CFUNC SIMD_CPPFUNC SIMD_OVERLOAD +#define SIMD_NOINLINE SIMD_CONST SIMD_NODEBUG SIMD_OVERLOAD +#define SIMD_NONCONST SIMD_INLINE SIMD_NODEBUG SIMD_OVERLOAD +#define __SIMD_INLINE__ SIMD_CPPFUNC +#define __SIMD_ATTRIBUTES__ SIMD_CFUNC +#define __SIMD_OVERLOAD__ SIMD_OVERLOAD + +# if __has_feature(cxx_constexpr) +# define SIMD_CONSTEXPR constexpr +# else +# define SIMD_CONSTEXPR /* nothing */ +# endif + +# if __has_feature(cxx_noexcept) +# define SIMD_NOEXCEPT noexcept +# else +# define SIMD_NOEXCEPT /* nothing */ +# endif + +#if defined __cplusplus +/*! @abstract A boolean scalar. */ +typedef bool simd_bool; +#else +/*! @abstract A boolean scalar. */ +typedef _Bool simd_bool; +#endif +/*! @abstract A boolean scalar. + * @discussion This type is deprecated; In C or Objective-C sources, use + * `_Bool` instead. In C++ sources, use `bool`. */ +typedef simd_bool __SIMD_BOOLEAN_TYPE__; + +# endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* defined SIMD_BASE */ diff --git a/vfsoverlay/common.h b/vfsoverlay/common.h new file mode 100644 index 00000000..cb2f6503 --- /dev/null +++ b/vfsoverlay/common.h @@ -0,0 +1,4525 @@ +/*! @header + * The interfaces declared in this header provide "common" elementwise + * operations that are neither math nor logic functions. These are available + * only for floating-point vectors and scalars, except for min, max, abs, + * clamp, and the reduce operations, which also support integer vectors. + * + * simd_abs(x) Absolute value of x. Also available as fabs + * for floating-point vectors. If x is the + * smallest signed integer, x is returned. + * + * simd_max(x,y) Returns the maximum of x and y. Also available + * as fmax for floating-point vectors. + * + * simd_min(x,y) Returns the minimum of x and y. Also available + * as fmin for floating-point vectors. + * + * simd_clamp(x,min,max) x clamped to the range [min, max]. + * + * simd_sign(x) -1 if x is less than zero, 0 if x is zero or + * NaN, and +1 if x is greater than zero. + * + * simd_mix(x,y,t) If t is not in the range [0,1], the result is + * simd_lerp(x,y,t) undefined. Otherwise the result is x+(y-x)*t, + * which linearly interpolates between x and y. + * + * simd_recip(x) An approximation to 1/x. If x is very near the + * limits of representable values, or is infinity + * or NaN, the result is undefined. There are + * two variants of this function: + * + * simd_precise_recip(x) + * + * and + * + * simd_fast_recip(x). + * + * The "precise" variant is accurate to a few ULPs, + * whereas the "fast" variant may have as little + * as 11 bits of accuracy in float and about 22 + * bits in double. + * + * The function simd_recip(x) resolves to + * simd_precise_recip(x) ordinarily, but to + * simd_fast_recip(x) when used in a translation + * unit compiled with -ffast-math (when + * -ffast-math is in effect, you may still use the + * precise version of this function by calling it + * explicitly by name). + * + * simd_rsqrt(x) An approximation to 1/sqrt(x). If x is + * infinity or NaN, the result is undefined. + * There are two variants of this function: + * + * simd_precise_rsqrt(x) + * + * and + * + * simd_fast_rsqrt(x). + * + * The "precise" variant is accurate to a few ULPs, + * whereas the "fast" variant may have as little + * as 11 bits of accuracy in float and about 22 + * bits in double. + * + * The function simd_rsqrt(x) resolves to + * simd_precise_rsqrt(x) ordinarily, but to + * simd_fast_rsqrt(x) when used in a translation + * unit compiled with -ffast-math (when + * -ffast-math is in effect, you may still use the + * precise version of this function by calling it + * explicitly by name). + * + * simd_fract(x) The "fractional part" of x, which lies strictly + * in the range [0, 0x1.fffffep-1]. + * + * simd_step(edge,x) 0 if x < edge, and 1 otherwise. + * + * simd_smoothstep(edge0,edge1,x) 0 if x <= edge0, 1 if x >= edge1, and + * a Hermite interpolation between 0 and 1 if + * edge0 < x < edge1. + * + * simd_reduce_add(x) Sum of the elements of x. + * + * simd_reduce_min(x) Minimum of the elements of x. + * + * simd_reduce_max(x) Maximum of the elements of x. + * + * simd_equal(x,y) True if and only if every lane of x is equal + * to the corresponding lane of y. + * + * The following common functions are available in the simd:: namespace: + * + * C++ Function Equivalent C Function + * -------------------------------------------------------------------- + * simd::abs(x) simd_abs(x) + * simd::max(x,y) simd_max(x,y) + * simd::min(x,y) simd_min(x,y) + * simd::clamp(x,min,max) simd_clamp(x,min,max) + * simd::sign(x) simd_sign(x) + * simd::mix(x,y,t) simd_mix(x,y,t) + * simd::lerp(x,y,t) simd_lerp(x,y,t) + * simd::recip(x) simd_recip(x) + * simd::rsqrt(x) simd_rsqrt(x) + * simd::fract(x) simd_fract(x) + * simd::step(edge,x) simd_step(edge,x) + * simd::smoothstep(e0,e1,x) simd_smoothstep(e0,e1,x) + * simd::reduce_add(x) simd_reduce_add(x) + * simd::reduce_max(x) simd_reduce_max(x) + * simd::reduce_min(x) simd_reduce_min(x) + * simd::equal(x,y) simd_equal(x,y) + * + * simd::precise::recip(x) simd_precise_recip(x) + * simd::precise::rsqrt(x) simd_precise_rsqrt(x) + * + * simd::fast::recip(x) simd_fast_recip(x) + * simd::fast::rsqrt(x) simd_fast_rsqrt(x) + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_COMMON_HEADER +#define SIMD_COMMON_HEADER + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x); +/*! @abstract The elementwise absolute value of x. */ +static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x); +/*! @abstract The elementwise absolute value of x. + * @discussion Deprecated. Use simd_abs(x) instead. */ +#define vector_abs simd_abs + +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC float simd_max(float x, float y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC double simd_max(double x, double y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y); +/*! @abstract The elementwise maximum of x and y. */ +static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y); +/*! @abstract The elementwise maximum of x and y. + * @discussion Deprecated. Use simd_max(x,y) instead. */ +#define vector_max simd_max + +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC float simd_min(float x, float y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC double simd_min(double x, double y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y); +/*! @abstract The elementwise minimum of x and y. */ +static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y); +/*! @abstract The elementwise minimum of x and y. + * @discussion Deprecated. Use simd_min(x,y) instead. */ +#define vector_min simd_min + + +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC float simd_clamp(float x, float min, float max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC double simd_clamp(double x, double min, double max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Note that if you want to clamp all lanes to the same range, + * you can use a scalar value for min and max. */ +static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max); +/*! @abstract x clamped to the range [min, max]. + * @discussion Deprecated. Use simd_clamp(x,min,max) instead. */ +#define vector_clamp simd_clamp + +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC float simd_sign(float x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC double simd_sign(double x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. */ +static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x); +/*! @abstract -1 if x is negative, +1 if x is positive, and 0 otherwise. + * @discussion Deprecated. Use simd_sign(x) instead. */ +#define vector_sign simd_sign + +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC float simd_mix(float x, float y, float t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC double simd_mix(double x, double y, double t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 */ +static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t); +/*! @abstract Linearly interpolates between x and y, taking the value x when + * t=0 and y when t=1 + * @discussion Deprecated. Use simd_mix(x, y, t) instead. */ +#define vector_mix simd_mix +#define simd_lerp simd_mix + +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC float simd_precise_recip(float x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC double simd_precise_recip(double x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x); +/*! @abstract A good approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * a few units in the last place (ULPs). */ +static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x); +/*! @abstract A good approximation to 1/x. + * @discussion Deprecated. Use simd_precise_recip(x) instead. */ +#define vector_precise_recip simd_precise_recip + +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC float simd_fast_recip(float x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC double simd_fast_recip(double x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x); +/*! @abstract A fast approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow; otherwise this function is accurate to + * at least 11 bits for float and 22 bits for double. */ +static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x); +/*! @abstract A fast approximation to 1/x. + * @discussion Deprecated. Use simd_fast_recip(x) instead. */ +#define vector_fast_recip simd_fast_recip + +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC float simd_recip(float x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC double simd_recip(double x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x); +/*! @abstract An approximation to 1/x. + * @discussion If x is very close to the limits of representation, the + * result may overflow or underflow. This function maps to + * simd_fast_recip(x) if -ffast-math is specified, and to + * simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x); +/*! @abstract An approximation to 1/x. + * @discussion Deprecated. Use simd_recip(x) instead. */ +#define vector_recip simd_recip + +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC float simd_precise_rsqrt(float x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC double simd_precise_rsqrt(double x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion This function is accurate to a few units in the last place + * (ULPs). */ +static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x); +/*! @abstract A good approximation to 1/sqrt(x). + * @discussion Deprecated. Use simd_precise_rsqrt(x) instead. */ +#define vector_precise_rsqrt simd_precise_rsqrt + +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC float simd_fast_rsqrt(float x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC double simd_fast_rsqrt(double x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion This function is accurate to at least 11 bits for float and + * 22 bits for double. */ +static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x); +/*! @abstract A fast approximation to 1/sqrt(x). + * @discussion Deprecated. Use simd_fast_rsqrt(x) instead. */ +#define vector_fast_rsqrt simd_fast_rsqrt + +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC float simd_rsqrt(float x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC double simd_rsqrt(double x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion This function maps to simd_fast_recip(x) if -ffast-math is + * specified, and to simd_precise_recip(x) otherwise. */ +static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x); +/*! @abstract An approximation to 1/sqrt(x). + * @discussion Deprecated. Use simd_rsqrt(x) instead. */ +#define vector_rsqrt simd_rsqrt + +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC float simd_fract(float x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC double simd_fract(double x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion floor(x) + fract(x) is *approximately* equal to x. If x is + * positive and finite, then the two values are exactly equal. */ +static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x); +/*! @abstract The "fractional part" of x, lying in the range [0, 1). + * @discussion Deprecated. Use simd_fract(x) instead. */ +#define vector_fract simd_fract + +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC float simd_step(float edge, float x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC double simd_step(double edge, double x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Use a scalar value for edge if you want to apply the same + * threshold to all lanes. */ +static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x); +/*! @abstract 0 if x < edge, and 1 otherwise. + * @discussion Deprecated. Use simd_step(edge, x) instead. */ +#define vector_step simd_step + +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion You can use a scalar value for edge0 and edge1 if you want + * to clamp all lanes at the same points. */ +static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x); +/*! @abstract Interpolates smoothly between 0 at edge0 and 1 at edge1 + * @discussion Deprecated. Use simd_smoothstep(edge0, edge1, x) instead. */ +#define vector_smoothstep simd_smoothstep + +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x); +/*! @abstract Sum of elements in x. + * @discussion This computation may overflow; especial for 8-bit types you + * may need to convert to a wider type before reducing. */ +static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x); +/*! @abstract Sum of elements in x. + * @discussion Deprecated. Use simd_add(x) instead. */ +#define vector_reduce_add simd_reduce_add + +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x); +/*! @abstract Minimum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x); +/*! @abstract Minimum of elements in x. + * @discussion Deprecated. Use simd_min(x) instead. */ +#define vector_reduce_min simd_reduce_min + +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x); +/*! @abstract Maximum of elements in x. */ +static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x); +/*! @abstract Maximum of elements in x. + * @discussion Deprecated. Use simd_max(x) instead. */ +#define vector_reduce_max simd_reduce_max + +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char2 x, simd_char2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char3 x, simd_char3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char4 x, simd_char4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char8 x, simd_char8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char16 x, simd_char16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char32 x, simd_char32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_char64 x, simd_char64 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar2 x, simd_uchar2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar3 x, simd_uchar3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar4 x, simd_uchar4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar8 x, simd_uchar8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar16 x, simd_uchar16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar32 x, simd_uchar32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uchar64 x, simd_uchar64 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short2 x, simd_short2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short3 x, simd_short3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short4 x, simd_short4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short8 x, simd_short8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short16 x, simd_short16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_short32 x, simd_short32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort2 x, simd_ushort2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort3 x, simd_ushort3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort4 x, simd_ushort4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort8 x, simd_ushort8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort16 x, simd_ushort16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ushort32 x, simd_ushort32 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int2 x, simd_int2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int3 x, simd_int3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int4 x, simd_int4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int8 x, simd_int8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_int16 x, simd_int16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint2 x, simd_uint2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint3 x, simd_uint3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint4 x, simd_uint4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint8 x, simd_uint8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_uint16 x, simd_uint16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float2 x, simd_float2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float3 x, simd_float3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float4 x, simd_float4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float8 x, simd_float8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_float16 x, simd_float16 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long2 x, simd_long2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long3 x, simd_long3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long4 x, simd_long4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_long8 x, simd_long8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong2 x, simd_ulong2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong3 x, simd_ulong3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong4 x, simd_ulong4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_ulong8 x, simd_ulong8 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double2 x, simd_double2 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double3 x, simd_double3 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double4 x, simd_double4 y) { + return simd_all(x == y); +} +/*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. */ +static inline SIMD_CFUNC simd_bool simd_equal(simd_double8 x, simd_double8 y) { + return simd_all(x == y); +} + +#ifdef __cplusplus +} /* extern "C" */ + +namespace simd { + /*! @abstract The lanewise absolute value of x. */ + template static SIMD_CPPFUNC typeN abs(const typeN x) { return ::simd_abs(x); } + /*! @abstract The lanewise maximum of x and y. */ + template static SIMD_CPPFUNC typeN max(const typeN x, const typeN y) { return ::simd_max(x,y); } + /*! @abstract The lanewise minimum of x and y. */ + template static SIMD_CPPFUNC typeN min(const typeN x, const typeN y) { return ::simd_min(x,y); } + /*! @abstract x clamped to the interval [min, max]. */ + template static SIMD_CPPFUNC typeN clamp(const typeN x, const typeN min, const typeN max) { return ::simd_clamp(x,min,max); } + /*! @abstract -1 if x < 0, +1 if x > 0, and 0 otherwise. */ + template static SIMD_CPPFUNC fptypeN sign(const fptypeN x) { return ::simd_sign(x); } + /*! @abstract Linearly interpolates between x and y, taking the value x when t=0 and y when t=1 */ + template static SIMD_CPPFUNC fptypeN mix(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); } + template static SIMD_CPPFUNC fptypeN lerp(const fptypeN x, const fptypeN y, const fptypeN t) { return ::simd_mix(x,y,t); } + /*! @abstract An approximation to 1/x. */ + template static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return simd_recip(x); } + /*! @abstract An approximation to 1/sqrt(x). */ + template static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return simd_rsqrt(x); } + /*! @abstract The "fracional part" of x, in the range [0,1). */ + template static SIMD_CPPFUNC fptypeN fract(const fptypeN x) { return ::simd_fract(x); } + /*! @abstract 0 if x < edge, 1 otherwise. */ + template static SIMD_CPPFUNC fptypeN step(const fptypeN edge, const fptypeN x) { return ::simd_step(edge,x); } + /*! @abstract smoothly interpolates from 0 at edge0 to 1 at edge1. */ + template static SIMD_CPPFUNC fptypeN smoothstep(const fptypeN edge0, const fptypeN edge1, const fptypeN x) { return ::simd_smoothstep(edge0,edge1,x); } + /*! @abstract True if and only if each lane of x is equal to the + * corresponding lane of y. + * + * @discussion This isn't operator== because that's already defined by + * the compiler to return a lane mask. */ + template static SIMD_CPPFUNC simd_bool equal(const fptypeN x, const fptypeN y) { return ::simd_equal(x, y); } +#if __cpp_decltype_auto + /* If you are targeting an earlier version of the C++ standard that lacks + decltype_auto support, you may use the C-style simd_reduce_* functions + instead. */ + /*! @abstract The sum of the elements in x. May overflow. */ + template static SIMD_CPPFUNC auto reduce_add(typeN x) { return ::simd_reduce_add(x); } + /*! @abstract The least element in x. */ + template static SIMD_CPPFUNC auto reduce_min(typeN x) { return ::simd_reduce_min(x); } + /*! @abstract The greatest element in x. */ + template static SIMD_CPPFUNC auto reduce_max(typeN x) { return ::simd_reduce_max(x); } +#endif + namespace precise { + /*! @abstract An approximation to 1/x. */ + template static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_precise_recip(x); } + /*! @abstract An approximation to 1/sqrt(x). */ + template static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_precise_rsqrt(x); } + } + namespace fast { + /*! @abstract An approximation to 1/x. */ + template static SIMD_CPPFUNC fptypeN recip(const fptypeN x) { return ::simd_fast_recip(x); } + /*! @abstract An approximation to 1/sqrt(x). */ + template static SIMD_CPPFUNC fptypeN rsqrt(const fptypeN x) { return ::simd_fast_rsqrt(x); } + } +} + +extern "C" { +#endif /* __cplusplus */ + +#pragma mark - Implementation + +static inline SIMD_CFUNC simd_char2 simd_abs(simd_char2 x) { + return simd_make_char2(simd_abs(simd_make_char8_undef(x))); +} + +static inline SIMD_CFUNC simd_char3 simd_abs(simd_char3 x) { + return simd_make_char3(simd_abs(simd_make_char8_undef(x))); +} + +static inline SIMD_CFUNC simd_char4 simd_abs(simd_char4 x) { + return simd_make_char4(simd_abs(simd_make_char8_undef(x))); +} + +static inline SIMD_CFUNC simd_char8 simd_abs(simd_char8 x) { +#if defined __arm__ || defined __arm64__ + return vabs_s8(x); +#else + return simd_make_char8(simd_abs(simd_make_char16_undef(x))); +#endif +} + +static inline SIMD_CFUNC simd_char16 simd_abs(simd_char16 x) { +#if defined __arm__ || defined __arm64__ + return vabsq_s8(x); +#elif defined __SSE4_1__ + return (simd_char16) _mm_abs_epi8((__m128i)x); +#else + simd_char16 mask = x >> 7; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_char32 simd_abs(simd_char32 x) { +#if defined __AVX2__ + return _mm256_abs_epi8(x); +#else + return simd_make_char32(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_char64 simd_abs(simd_char64 x) { +#if defined __AVX512BW__ + return _mm512_abs_epi8(x); +#else + return simd_make_char64(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_short2 simd_abs(simd_short2 x) { + return simd_make_short2(simd_abs(simd_make_short4_undef(x))); +} + +static inline SIMD_CFUNC simd_short3 simd_abs(simd_short3 x) { + return simd_make_short3(simd_abs(simd_make_short4_undef(x))); +} + +static inline SIMD_CFUNC simd_short4 simd_abs(simd_short4 x) { +#if defined __arm__ || defined __arm64__ + return vabs_s16(x); +#else + return simd_make_short4(simd_abs(simd_make_short8_undef(x))); +#endif +} + +static inline SIMD_CFUNC simd_short8 simd_abs(simd_short8 x) { +#if defined __arm__ || defined __arm64__ + return vabsq_s16(x); +#elif defined __SSE4_1__ + return (simd_short8) _mm_abs_epi16((__m128i)x); +#else + simd_short8 mask = x >> 15; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_short16 simd_abs(simd_short16 x) { +#if defined __AVX2__ + return _mm256_abs_epi16(x); +#else + return simd_make_short16(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_short32 simd_abs(simd_short32 x) { +#if defined __AVX512BW__ + return _mm512_abs_epi16(x); +#else + return simd_make_short32(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_int2 simd_abs(simd_int2 x) { +#if defined __arm__ || defined __arm64__ + return vabs_s32(x); +#else + return simd_make_int2(simd_abs(simd_make_int4_undef(x))); +#endif +} + +static inline SIMD_CFUNC simd_int3 simd_abs(simd_int3 x) { + return simd_make_int3(simd_abs(simd_make_int4_undef(x))); +} + +static inline SIMD_CFUNC simd_int4 simd_abs(simd_int4 x) { +#if defined __arm__ || defined __arm64__ + return vabsq_s32(x); +#elif defined __SSE4_1__ + return (simd_int4) _mm_abs_epi32((__m128i)x); +#else + simd_int4 mask = x >> 31; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_int8 simd_abs(simd_int8 x) { +#if defined __AVX2__ + return _mm256_abs_epi32(x); +#else + return simd_make_int8(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_int16 simd_abs(simd_int16 x) { +#if defined __AVX512F__ + return _mm512_abs_epi32(x); +#else + return simd_make_int16(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_abs(simd_float2 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float3 simd_abs(simd_float3 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float4 simd_abs(simd_float4 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float8 simd_abs(simd_float8 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_float16 simd_abs(simd_float16 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_long2 simd_abs(simd_long2 x) { +#if defined __arm64__ + return vabsq_s64(x); +#elif defined __AVX512VL__ + return (simd_long2) _mm_abs_epi64((__m128i)x); +#else + simd_long2 mask = x >> 63; return (x ^ mask) - mask; +#endif +} + +static inline SIMD_CFUNC simd_long3 simd_abs(simd_long3 x) { + return simd_make_long3(simd_abs(simd_make_long4_undef(x))); +} + +static inline SIMD_CFUNC simd_long4 simd_abs(simd_long4 x) { +#if defined __AVX512VL__ + return _mm256_abs_epi64(x); +#else + return simd_make_long4(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_long8 simd_abs(simd_long8 x) { +#if defined __AVX512F__ + return _mm512_abs_epi64(x); +#else + return simd_make_long8(simd_abs(x.lo), simd_abs(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_double2 simd_abs(simd_double2 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_double3 simd_abs(simd_double3 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_double4 simd_abs(simd_double4 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_double8 simd_abs(simd_double8 x) { + return __tg_fabs(x); +} + +static inline SIMD_CFUNC simd_char2 simd_min(simd_char2 x, simd_char2 y) { + return simd_make_char2(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char3 simd_min(simd_char3 x, simd_char3 y) { + return simd_make_char3(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char4 simd_min(simd_char4 x, simd_char4 y) { + return simd_make_char4(simd_min(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char8 simd_min(simd_char8 x, simd_char8 y) { +#if defined __arm__ || defined __arm64__ + return vmin_s8(x, y); +#else + return simd_make_char8(simd_min(simd_make_char16_undef(x), simd_make_char16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_char16 simd_min(simd_char16 x, simd_char16 y) { +#if defined __arm__ || defined __arm64__ + return vminq_s8(x, y); +#elif defined __SSE4_1__ + return (simd_char16) _mm_min_epi8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_char32 simd_min(simd_char32 x, simd_char32 y) { +#if defined __AVX2__ + return _mm256_min_epi8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_char64 simd_min(simd_char64 x, simd_char64 y) { +#if defined __AVX512BW__ + return _mm512_min_epi8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uchar2 simd_min(simd_uchar2 x, simd_uchar2 y) { + return simd_make_uchar2(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar3 simd_min(simd_uchar3 x, simd_uchar3 y) { + return simd_make_uchar3(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar4 simd_min(simd_uchar4 x, simd_uchar4 y) { + return simd_make_uchar4(simd_min(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar8 simd_min(simd_uchar8 x, simd_uchar8 y) { +#if defined __arm__ || defined __arm64__ + return vmin_u8(x, y); +#else + return simd_make_uchar8(simd_min(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uchar16 simd_min(simd_uchar16 x, simd_uchar16 y) { +#if defined __arm__ || defined __arm64__ + return vminq_u8(x, y); +#elif defined __SSE4_1__ + return (simd_uchar16) _mm_min_epu8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uchar32 simd_min(simd_uchar32 x, simd_uchar32 y) { +#if defined __AVX2__ + return _mm256_min_epu8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uchar64 simd_min(simd_uchar64 x, simd_uchar64 y) { +#if defined __AVX512BW__ + return _mm512_min_epu8(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_short2 simd_min(simd_short2 x, simd_short2 y) { + return simd_make_short2(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short3 simd_min(simd_short3 x, simd_short3 y) { + return simd_make_short3(simd_min(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short4 simd_min(simd_short4 x, simd_short4 y) { +#if defined __arm__ || defined __arm64__ + return vmin_s16(x, y); +#else + return simd_make_short4(simd_min(simd_make_short8_undef(x), simd_make_short8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_short8 simd_min(simd_short8 x, simd_short8 y) { +#if defined __arm__ || defined __arm64__ + return vminq_s16(x, y); +#elif defined __SSE4_1__ + return (simd_short8) _mm_min_epi16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_short16 simd_min(simd_short16 x, simd_short16 y) { +#if defined __AVX2__ + return _mm256_min_epi16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_short32 simd_min(simd_short32 x, simd_short32 y) { +#if defined __AVX512BW__ + return _mm512_min_epi16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ushort2 simd_min(simd_ushort2 x, simd_ushort2 y) { + return simd_make_ushort2(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort3 simd_min(simd_ushort3 x, simd_ushort3 y) { + return simd_make_ushort3(simd_min(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort4 simd_min(simd_ushort4 x, simd_ushort4 y) { +#if defined __arm__ || defined __arm64__ + return vmin_u16(x, y); +#else + return simd_make_ushort4(simd_min(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_ushort8 simd_min(simd_ushort8 x, simd_ushort8 y) { +#if defined __arm__ || defined __arm64__ + return vminq_u16(x, y); +#elif defined __SSE4_1__ + return (simd_ushort8) _mm_min_epu16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ushort16 simd_min(simd_ushort16 x, simd_ushort16 y) { +#if defined __AVX2__ + return _mm256_min_epu16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ushort32 simd_min(simd_ushort32 x, simd_ushort32 y) { +#if defined __AVX512BW__ + return _mm512_min_epu16(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_int2 simd_min(simd_int2 x, simd_int2 y) { +#if defined __arm__ || defined __arm64__ + return vmin_s32(x, y); +#else + return simd_make_int2(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_int3 simd_min(simd_int3 x, simd_int3 y) { + return simd_make_int3(simd_min(simd_make_int4_undef(x), simd_make_int4_undef(y))); +} + +static inline SIMD_CFUNC simd_int4 simd_min(simd_int4 x, simd_int4 y) { +#if defined __arm__ || defined __arm64__ + return vminq_s32(x, y); +#elif defined __SSE4_1__ + return (simd_int4) _mm_min_epi32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_int8 simd_min(simd_int8 x, simd_int8 y) { +#if defined __AVX2__ + return _mm256_min_epi32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_int16 simd_min(simd_int16 x, simd_int16 y) { +#if defined __AVX512F__ + return _mm512_min_epi32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uint2 simd_min(simd_uint2 x, simd_uint2 y) { +#if defined __arm__ || defined __arm64__ + return vmin_u32(x, y); +#else + return simd_make_uint2(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uint3 simd_min(simd_uint3 x, simd_uint3 y) { + return simd_make_uint3(simd_min(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +} + +static inline SIMD_CFUNC simd_uint4 simd_min(simd_uint4 x, simd_uint4 y) { +#if defined __arm__ || defined __arm64__ + return vminq_u32(x, y); +#elif defined __SSE4_1__ + return (simd_uint4) _mm_min_epu32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uint8 simd_min(simd_uint8 x, simd_uint8 y) { +#if defined __AVX2__ + return _mm256_min_epu32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_uint16 simd_min(simd_uint16 x, simd_uint16 y) { +#if defined __AVX512F__ + return _mm512_min_epu32(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC float simd_min(float x, float y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float2 simd_min(simd_float2 x, simd_float2 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float3 simd_min(simd_float3 x, simd_float3 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float4 simd_min(simd_float4 x, simd_float4 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float8 simd_min(simd_float8 x, simd_float8 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_float16 simd_min(simd_float16 x, simd_float16 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_long2 simd_min(simd_long2 x, simd_long2 y) { +#if defined __AVX512VL__ + return _mm_min_epi64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_long3 simd_min(simd_long3 x, simd_long3 y) { + return simd_make_long3(simd_min(simd_make_long4_undef(x), simd_make_long4_undef(y))); +} + +static inline SIMD_CFUNC simd_long4 simd_min(simd_long4 x, simd_long4 y) { +#if defined __AVX512VL__ + return _mm256_min_epi64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_long8 simd_min(simd_long8 x, simd_long8 y) { +#if defined __AVX512F__ + return _mm512_min_epi64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ulong2 simd_min(simd_ulong2 x, simd_ulong2 y) { +#if defined __AVX512VL__ + return _mm_min_epu64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ulong3 simd_min(simd_ulong3 x, simd_ulong3 y) { + return simd_make_ulong3(simd_min(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y))); +} + +static inline SIMD_CFUNC simd_ulong4 simd_min(simd_ulong4 x, simd_ulong4 y) { +#if defined __AVX512VL__ + return _mm256_min_epu64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC simd_ulong8 simd_min(simd_ulong8 x, simd_ulong8 y) { +#if defined __AVX512F__ + return _mm512_min_epu64(x, y); +#else + return simd_bitselect(x, y, y < x); +#endif +} + +static inline SIMD_CFUNC double simd_min(double x, double y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double2 simd_min(simd_double2 x, simd_double2 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double3 simd_min(simd_double3 x, simd_double3 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double4 simd_min(simd_double4 x, simd_double4 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_double8 simd_min(simd_double8 x, simd_double8 y) { + return __tg_fmin(x,y); +} + +static inline SIMD_CFUNC simd_char2 simd_max(simd_char2 x, simd_char2 y) { + return simd_make_char2(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char3 simd_max(simd_char3 x, simd_char3 y) { + return simd_make_char3(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char4 simd_max(simd_char4 x, simd_char4 y) { + return simd_make_char4(simd_max(simd_make_char8_undef(x), simd_make_char8_undef(y))); +} + +static inline SIMD_CFUNC simd_char8 simd_max(simd_char8 x, simd_char8 y) { +#if defined __arm__ || defined __arm64__ + return vmax_s8(x, y); +#else + return simd_make_char8(simd_max(simd_make_char16_undef(x), simd_make_char16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_char16 simd_max(simd_char16 x, simd_char16 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_s8(x, y); +#elif defined __SSE4_1__ + return (simd_char16) _mm_max_epi8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_char32 simd_max(simd_char32 x, simd_char32 y) { +#if defined __AVX2__ + return _mm256_max_epi8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_char64 simd_max(simd_char64 x, simd_char64 y) { +#if defined __AVX512BW__ + return _mm512_max_epi8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uchar2 simd_max(simd_uchar2 x, simd_uchar2 y) { + return simd_make_uchar2(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar3 simd_max(simd_uchar3 x, simd_uchar3 y) { + return simd_make_uchar3(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar4 simd_max(simd_uchar4 x, simd_uchar4 y) { + return simd_make_uchar4(simd_max(simd_make_uchar8_undef(x), simd_make_uchar8_undef(y))); +} + +static inline SIMD_CFUNC simd_uchar8 simd_max(simd_uchar8 x, simd_uchar8 y) { +#if defined __arm__ || defined __arm64__ + return vmax_u8(x, y); +#else + return simd_make_uchar8(simd_max(simd_make_uchar16_undef(x), simd_make_uchar16_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uchar16 simd_max(simd_uchar16 x, simd_uchar16 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_u8(x, y); +#elif defined __SSE4_1__ + return (simd_uchar16) _mm_max_epu8((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uchar32 simd_max(simd_uchar32 x, simd_uchar32 y) { +#if defined __AVX2__ + return _mm256_max_epu8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uchar64 simd_max(simd_uchar64 x, simd_uchar64 y) { +#if defined __AVX512BW__ + return _mm512_max_epu8(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_short2 simd_max(simd_short2 x, simd_short2 y) { + return simd_make_short2(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short3 simd_max(simd_short3 x, simd_short3 y) { + return simd_make_short3(simd_max(simd_make_short4_undef(x), simd_make_short4_undef(y))); +} + +static inline SIMD_CFUNC simd_short4 simd_max(simd_short4 x, simd_short4 y) { +#if defined __arm__ || defined __arm64__ + return vmax_s16(x, y); +#else + return simd_make_short4(simd_max(simd_make_short8_undef(x), simd_make_short8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_short8 simd_max(simd_short8 x, simd_short8 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_s16(x, y); +#elif defined __SSE4_1__ + return (simd_short8) _mm_max_epi16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_short16 simd_max(simd_short16 x, simd_short16 y) { +#if defined __AVX2__ + return _mm256_max_epi16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_short32 simd_max(simd_short32 x, simd_short32 y) { +#if defined __AVX512BW__ + return _mm512_max_epi16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ushort2 simd_max(simd_ushort2 x, simd_ushort2 y) { + return simd_make_ushort2(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort3 simd_max(simd_ushort3 x, simd_ushort3 y) { + return simd_make_ushort3(simd_max(simd_make_ushort4_undef(x), simd_make_ushort4_undef(y))); +} + +static inline SIMD_CFUNC simd_ushort4 simd_max(simd_ushort4 x, simd_ushort4 y) { +#if defined __arm__ || defined __arm64__ + return vmax_u16(x, y); +#else + return simd_make_ushort4(simd_max(simd_make_ushort8_undef(x), simd_make_ushort8_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_ushort8 simd_max(simd_ushort8 x, simd_ushort8 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_u16(x, y); +#elif defined __SSE4_1__ + return (simd_ushort8) _mm_max_epu16((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ushort16 simd_max(simd_ushort16 x, simd_ushort16 y) { +#if defined __AVX2__ + return _mm256_max_epu16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ushort32 simd_max(simd_ushort32 x, simd_ushort32 y) { +#if defined __AVX512BW__ + return _mm512_max_epu16(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_int2 simd_max(simd_int2 x, simd_int2 y) { +#if defined __arm__ || defined __arm64__ + return vmax_s32(x, y); +#else + return simd_make_int2(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_int3 simd_max(simd_int3 x, simd_int3 y) { + return simd_make_int3(simd_max(simd_make_int4_undef(x), simd_make_int4_undef(y))); +} + +static inline SIMD_CFUNC simd_int4 simd_max(simd_int4 x, simd_int4 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_s32(x, y); +#elif defined __SSE4_1__ + return (simd_int4) _mm_max_epi32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_int8 simd_max(simd_int8 x, simd_int8 y) { +#if defined __AVX2__ + return _mm256_max_epi32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_int16 simd_max(simd_int16 x, simd_int16 y) { +#if defined __AVX512F__ + return _mm512_max_epi32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uint2 simd_max(simd_uint2 x, simd_uint2 y) { +#if defined __arm__ || defined __arm64__ + return vmax_u32(x, y); +#else + return simd_make_uint2(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +#endif + +} + +static inline SIMD_CFUNC simd_uint3 simd_max(simd_uint3 x, simd_uint3 y) { + return simd_make_uint3(simd_max(simd_make_uint4_undef(x), simd_make_uint4_undef(y))); +} + +static inline SIMD_CFUNC simd_uint4 simd_max(simd_uint4 x, simd_uint4 y) { +#if defined __arm__ || defined __arm64__ + return vmaxq_u32(x, y); +#elif defined __SSE4_1__ + return (simd_uint4) _mm_max_epu32((__m128i)x, (__m128i)y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uint8 simd_max(simd_uint8 x, simd_uint8 y) { +#if defined __AVX2__ + return _mm256_max_epu32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_uint16 simd_max(simd_uint16 x, simd_uint16 y) { +#if defined __AVX512F__ + return _mm512_max_epu32(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC float simd_max(float x, float y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float2 simd_max(simd_float2 x, simd_float2 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float3 simd_max(simd_float3 x, simd_float3 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float4 simd_max(simd_float4 x, simd_float4 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float8 simd_max(simd_float8 x, simd_float8 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_float16 simd_max(simd_float16 x, simd_float16 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_long2 simd_max(simd_long2 x, simd_long2 y) { +#if defined __AVX512VL__ + return _mm_max_epi64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_long3 simd_max(simd_long3 x, simd_long3 y) { + return simd_make_long3(simd_max(simd_make_long4_undef(x), simd_make_long4_undef(y))); +} + +static inline SIMD_CFUNC simd_long4 simd_max(simd_long4 x, simd_long4 y) { +#if defined __AVX512VL__ + return _mm256_max_epi64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_long8 simd_max(simd_long8 x, simd_long8 y) { +#if defined __AVX512F__ + return _mm512_max_epi64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ulong2 simd_max(simd_ulong2 x, simd_ulong2 y) { +#if defined __AVX512VL__ + return _mm_max_epu64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ulong3 simd_max(simd_ulong3 x, simd_ulong3 y) { + return simd_make_ulong3(simd_max(simd_make_ulong4_undef(x), simd_make_ulong4_undef(y))); +} + +static inline SIMD_CFUNC simd_ulong4 simd_max(simd_ulong4 x, simd_ulong4 y) { +#if defined __AVX512VL__ + return _mm256_max_epu64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC simd_ulong8 simd_max(simd_ulong8 x, simd_ulong8 y) { +#if defined __AVX512F__ + return _mm512_max_epu64(x, y); +#else + return simd_bitselect(x, y, x < y); +#endif +} + +static inline SIMD_CFUNC double simd_max(double x, double y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double2 simd_max(simd_double2 x, simd_double2 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double3 simd_max(simd_double3 x, simd_double3 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double4 simd_max(simd_double4 x, simd_double4 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_double8 simd_max(simd_double8 x, simd_double8 y) { + return __tg_fmax(x,y); +} + +static inline SIMD_CFUNC simd_char2 simd_clamp(simd_char2 x, simd_char2 min, simd_char2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char3 simd_clamp(simd_char3 x, simd_char3 min, simd_char3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char4 simd_clamp(simd_char4 x, simd_char4 min, simd_char4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char8 simd_clamp(simd_char8 x, simd_char8 min, simd_char8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char16 simd_clamp(simd_char16 x, simd_char16 min, simd_char16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char32 simd_clamp(simd_char32 x, simd_char32 min, simd_char32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_char64 simd_clamp(simd_char64 x, simd_char64 min, simd_char64 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar2 simd_clamp(simd_uchar2 x, simd_uchar2 min, simd_uchar2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar3 simd_clamp(simd_uchar3 x, simd_uchar3 min, simd_uchar3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar4 simd_clamp(simd_uchar4 x, simd_uchar4 min, simd_uchar4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar8 simd_clamp(simd_uchar8 x, simd_uchar8 min, simd_uchar8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar16 simd_clamp(simd_uchar16 x, simd_uchar16 min, simd_uchar16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar32 simd_clamp(simd_uchar32 x, simd_uchar32 min, simd_uchar32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uchar64 simd_clamp(simd_uchar64 x, simd_uchar64 min, simd_uchar64 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short2 simd_clamp(simd_short2 x, simd_short2 min, simd_short2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short3 simd_clamp(simd_short3 x, simd_short3 min, simd_short3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short4 simd_clamp(simd_short4 x, simd_short4 min, simd_short4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short8 simd_clamp(simd_short8 x, simd_short8 min, simd_short8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short16 simd_clamp(simd_short16 x, simd_short16 min, simd_short16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_short32 simd_clamp(simd_short32 x, simd_short32 min, simd_short32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort2 simd_clamp(simd_ushort2 x, simd_ushort2 min, simd_ushort2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort3 simd_clamp(simd_ushort3 x, simd_ushort3 min, simd_ushort3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort4 simd_clamp(simd_ushort4 x, simd_ushort4 min, simd_ushort4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort8 simd_clamp(simd_ushort8 x, simd_ushort8 min, simd_ushort8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort16 simd_clamp(simd_ushort16 x, simd_ushort16 min, simd_ushort16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ushort32 simd_clamp(simd_ushort32 x, simd_ushort32 min, simd_ushort32 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int2 simd_clamp(simd_int2 x, simd_int2 min, simd_int2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int3 simd_clamp(simd_int3 x, simd_int3 min, simd_int3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int4 simd_clamp(simd_int4 x, simd_int4 min, simd_int4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int8 simd_clamp(simd_int8 x, simd_int8 min, simd_int8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_int16 simd_clamp(simd_int16 x, simd_int16 min, simd_int16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint2 simd_clamp(simd_uint2 x, simd_uint2 min, simd_uint2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint3 simd_clamp(simd_uint3 x, simd_uint3 min, simd_uint3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint4 simd_clamp(simd_uint4 x, simd_uint4 min, simd_uint4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint8 simd_clamp(simd_uint8 x, simd_uint8 min, simd_uint8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_uint16 simd_clamp(simd_uint16 x, simd_uint16 min, simd_uint16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC float simd_clamp(float x, float min, float max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float2 simd_clamp(simd_float2 x, simd_float2 min, simd_float2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float3 simd_clamp(simd_float3 x, simd_float3 min, simd_float3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float4 simd_clamp(simd_float4 x, simd_float4 min, simd_float4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float8 simd_clamp(simd_float8 x, simd_float8 min, simd_float8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_float16 simd_clamp(simd_float16 x, simd_float16 min, simd_float16 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long2 simd_clamp(simd_long2 x, simd_long2 min, simd_long2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long3 simd_clamp(simd_long3 x, simd_long3 min, simd_long3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long4 simd_clamp(simd_long4 x, simd_long4 min, simd_long4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_long8 simd_clamp(simd_long8 x, simd_long8 min, simd_long8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong2 simd_clamp(simd_ulong2 x, simd_ulong2 min, simd_ulong2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong3 simd_clamp(simd_ulong3 x, simd_ulong3 min, simd_ulong3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong4 simd_clamp(simd_ulong4 x, simd_ulong4 min, simd_ulong4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_ulong8 simd_clamp(simd_ulong8 x, simd_ulong8 min, simd_ulong8 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC double simd_clamp(double x, double min, double max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double2 simd_clamp(simd_double2 x, simd_double2 min, simd_double2 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double3 simd_clamp(simd_double3 x, simd_double3 min, simd_double3 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double4 simd_clamp(simd_double4 x, simd_double4 min, simd_double4 max) { + return simd_min(simd_max(x, min), max); +} + +static inline SIMD_CFUNC simd_double8 simd_clamp(simd_double8 x, simd_double8 min, simd_double8 max) { + return simd_min(simd_max(x, min), max); +} + + +static inline SIMD_CFUNC float simd_sign(float x) { + return (x == 0 | x != x) ? 0 : copysign(1,x); +} + +static inline SIMD_CFUNC simd_float2 simd_sign(simd_float2 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float3 simd_sign(simd_float3 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float4 simd_sign(simd_float4 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float8 simd_sign(simd_float8 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_float16 simd_sign(simd_float16 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC double simd_sign(double x) { + return (x == 0 | x != x) ? 0 : copysign(1,x); +} + +static inline SIMD_CFUNC simd_double2 simd_sign(simd_double2 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_double3 simd_sign(simd_double3 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_double4 simd_sign(simd_double4 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC simd_double8 simd_sign(simd_double8 x) { + return simd_bitselect(__tg_copysign(1,x), 0, x == 0 | x != x); +} + +static inline SIMD_CFUNC float simd_mix(float x, float y, float t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float2 simd_mix(simd_float2 x, simd_float2 y, simd_float2 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float3 simd_mix(simd_float3 x, simd_float3 y, simd_float3 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float4 simd_mix(simd_float4 x, simd_float4 y, simd_float4 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float8 simd_mix(simd_float8 x, simd_float8 y, simd_float8 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_float16 simd_mix(simd_float16 x, simd_float16 y, simd_float16 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC double simd_mix(double x, double y, double t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double2 simd_mix(simd_double2 x, simd_double2 y, simd_double2 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double3 simd_mix(simd_double3 x, simd_double3 y, simd_double3 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double4 simd_mix(simd_double4 x, simd_double4 y, simd_double4 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC simd_double8 simd_mix(simd_double8 x, simd_double8 y, simd_double8 t) { + return x + t*(y - x); +} + +static inline SIMD_CFUNC float simd_recip(float x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_recip(simd_float2 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_recip(simd_float3 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float4 simd_recip(simd_float4 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_recip(simd_float8 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_recip(simd_float16 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC double simd_recip(double x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double2 simd_recip(simd_double2 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double3 simd_recip(simd_double3 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double4 simd_recip(simd_double4 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_double8 simd_recip(simd_double8 x) { +#if __FAST_MATH__ + return simd_fast_recip(x); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC float simd_fast_recip(float x) { +#if defined __AVX512VL__ + simd_float4 x4 = simd_make_float4(x); + return ((simd_float4)_mm_rcp14_ss(x4, x4)).x; +#elif defined __SSE__ + return ((simd_float4)_mm_rcp_ss(simd_make_float4(x))).x; +#elif defined __ARM_NEON__ + return simd_fast_recip(simd_make_float2_undef(x)).x; +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_fast_recip(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_fast_recip(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = vrecpe_f32(x); + return r * vrecps_f32(x, r); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_fast_recip(simd_float3 x) { + return simd_make_float3(simd_fast_recip(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_fast_recip(simd_float4 x) { +#if defined __AVX512VL__ + return _mm_rcp14_ps(x); +#elif defined __SSE__ + return _mm_rcp_ps(x); +#elif defined __ARM_NEON__ + simd_float4 r = vrecpeq_f32(x); + return r * vrecpsq_f32(x, r); +#else + return simd_precise_recip(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_fast_recip(simd_float8 x) { +#if defined __AVX512VL__ + return _mm256_rcp14_ps(x); +#elif defined __AVX__ + return _mm256_rcp_ps(x); +#else + return simd_make_float8(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_fast_recip(simd_float16 x) { +#if defined __AVX512F__ + return _mm512_rcp14_ps(x); +#else + return simd_make_float16(simd_fast_recip(x.lo), simd_fast_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_fast_recip(double x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double2 simd_fast_recip(simd_double2 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double3 simd_fast_recip(simd_double3 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double4 simd_fast_recip(simd_double4 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC simd_double8 simd_fast_recip(simd_double8 x) { + return simd_precise_recip(x); +} + +static inline SIMD_CFUNC float simd_precise_recip(float x) { +#if defined __SSE__ + float r = simd_fast_recip(x); + return r*(2 - (x == 0 ? -INFINITY : x)*r); +#elif defined __ARM_NEON__ + return simd_precise_recip(simd_make_float2_undef(x)).x; +#else + return 1/x; +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_precise_recip(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_precise_recip(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = simd_fast_recip(x); + return r*vrecps_f32(x, r); +#else + return 1/x; +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_precise_recip(simd_float3 x) { + return simd_make_float3(simd_precise_recip(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_precise_recip(simd_float4 x) { +#if defined __SSE__ + simd_float4 r = simd_fast_recip(x); + return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); +#elif defined __ARM_NEON__ + simd_float4 r = simd_fast_recip(x); + return r*vrecpsq_f32(x, r); +#else + return 1/x; +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_precise_recip(simd_float8 x) { +#if defined __AVX__ + simd_float8 r = simd_fast_recip(x); + return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); +#else + return simd_make_float8(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_precise_recip(simd_float16 x) { +#if defined __AVX512F__ + simd_float16 r = simd_fast_recip(x); + return r*(2 - simd_bitselect(x, -INFINITY, x == 0)*r); +#else + return simd_make_float16(simd_precise_recip(x.lo), simd_precise_recip(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_precise_recip(double x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double2 simd_precise_recip(simd_double2 x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double3 simd_precise_recip(simd_double3 x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double4 simd_precise_recip(simd_double4 x) { + return 1/x; +} + +static inline SIMD_CFUNC simd_double8 simd_precise_recip(simd_double8 x) { + return 1/x; +} + +static inline SIMD_CFUNC float simd_rsqrt(float x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_rsqrt(simd_float2 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_rsqrt(simd_float3 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float4 simd_rsqrt(simd_float4 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_rsqrt(simd_float8 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_rsqrt(simd_float16 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC double simd_rsqrt(double x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double2 simd_rsqrt(simd_double2 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double3 simd_rsqrt(simd_double3 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double4 simd_rsqrt(simd_double4 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_double8 simd_rsqrt(simd_double8 x) { +#if __FAST_MATH__ + return simd_fast_rsqrt(x); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC float simd_fast_rsqrt(float x) { +#if defined __AVX512VL__ + simd_float4 x4 = simd_make_float4(x); + return ((simd_float4)_mm_rsqrt14_ss(x4, x4)).x; +#elif defined __SSE__ + return ((simd_float4)_mm_rsqrt_ss(simd_make_float4(x))).x; +#elif defined __ARM_NEON__ + return simd_fast_rsqrt(simd_make_float2_undef(x)).x; +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_fast_rsqrt(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_fast_rsqrt(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = vrsqrte_f32(x); + return r * vrsqrts_f32(x, r*r); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_fast_rsqrt(simd_float3 x) { + return simd_make_float3(simd_fast_rsqrt(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_fast_rsqrt(simd_float4 x) { +#if defined __AVX512VL__ + return _mm_rsqrt14_ps(x); +#elif defined __SSE__ + return _mm_rsqrt_ps(x); +#elif defined __ARM_NEON__ + simd_float4 r = vrsqrteq_f32(x); + return r * vrsqrtsq_f32(x, r*r); +#else + return simd_precise_rsqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_fast_rsqrt(simd_float8 x) { +#if defined __AVX512VL__ + return _mm256_rsqrt14_ps(x); +#elif defined __AVX__ + return _mm256_rsqrt_ps(x); +#else + return simd_make_float8(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_fast_rsqrt(simd_float16 x) { +#if defined __AVX512F__ + return _mm512_rsqrt14_ps(x); +#else + return simd_make_float16(simd_fast_rsqrt(x.lo), simd_fast_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_fast_rsqrt(double x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double2 simd_fast_rsqrt(simd_double2 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double3 simd_fast_rsqrt(simd_double3 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double4 simd_fast_rsqrt(simd_double4 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC simd_double8 simd_fast_rsqrt(simd_double8 x) { + return simd_precise_rsqrt(x); +} + +static inline SIMD_CFUNC float simd_precise_rsqrt(float x) { +#if defined __SSE__ + float r = simd_fast_rsqrt(x); + return r*(1.5f - 0.5f*(r == INFINITY ? -INFINITY : x)*r*r); +#elif defined __ARM_NEON__ + return simd_precise_rsqrt(simd_make_float2_undef(x)).x; +#else + return 1/sqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float2 simd_precise_rsqrt(simd_float2 x) { +#if defined __SSE__ + return simd_make_float2(simd_precise_rsqrt(simd_make_float4_undef(x))); +#elif defined __ARM_NEON__ + simd_float2 r = simd_fast_rsqrt(x); + return r*vrsqrts_f32(x, r*r); +#else + return 1/__tg_sqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float3 simd_precise_rsqrt(simd_float3 x) { + return simd_make_float3(simd_precise_rsqrt(simd_make_float4_undef(x))); +} + +static inline SIMD_CFUNC simd_float4 simd_precise_rsqrt(simd_float4 x) { +#if defined __SSE__ + simd_float4 r = simd_fast_rsqrt(x); + return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); +#elif defined __ARM_NEON__ + simd_float4 r = simd_fast_rsqrt(x); + return r*vrsqrtsq_f32(x, r*r); +#else + return 1/__tg_sqrt(x); +#endif +} + +static inline SIMD_CFUNC simd_float8 simd_precise_rsqrt(simd_float8 x) { +#if defined __AVX__ + simd_float8 r = simd_fast_rsqrt(x); + return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); +#else + return simd_make_float8(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 simd_precise_rsqrt(simd_float16 x) { +#if defined __AVX512F__ + simd_float16 r = simd_fast_rsqrt(x); + return r*(1.5 - 0.5*simd_bitselect(x, -INFINITY, r == INFINITY)*r*r); +#else + return simd_make_float16(simd_precise_rsqrt(x.lo), simd_precise_rsqrt(x.hi)); +#endif +} + +static inline SIMD_CFUNC double simd_precise_rsqrt(double x) { + return 1/sqrt(x); +} + +static inline SIMD_CFUNC simd_double2 simd_precise_rsqrt(simd_double2 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC simd_double3 simd_precise_rsqrt(simd_double3 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC simd_double4 simd_precise_rsqrt(simd_double4 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC simd_double8 simd_precise_rsqrt(simd_double8 x) { + return 1/__tg_sqrt(x); +} + +static inline SIMD_CFUNC float simd_fract(float x) { + return fmin(x - floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float2 simd_fract(simd_float2 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float3 simd_fract(simd_float3 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float4 simd_fract(simd_float4 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float8 simd_fract(simd_float8 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC simd_float16 simd_fract(simd_float16 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffep-1f); +} + +static inline SIMD_CFUNC double simd_fract(double x) { + return fmin(x - floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double2 simd_fract(simd_double2 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double3 simd_fract(simd_double3 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double4 simd_fract(simd_double4 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC simd_double8 simd_fract(simd_double8 x) { + return __tg_fmin(x - __tg_floor(x), 0x1.fffffffffffffp-1); +} + +static inline SIMD_CFUNC float simd_step(float edge, float x) { + return !(x < edge); +} + +static inline SIMD_CFUNC simd_float2 simd_step(simd_float2 edge, simd_float2 x) { + return simd_bitselect((simd_float2)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float3 simd_step(simd_float3 edge, simd_float3 x) { + return simd_bitselect((simd_float3)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float4 simd_step(simd_float4 edge, simd_float4 x) { + return simd_bitselect((simd_float4)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float8 simd_step(simd_float8 edge, simd_float8 x) { + return simd_bitselect((simd_float8)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_float16 simd_step(simd_float16 edge, simd_float16 x) { + return simd_bitselect((simd_float16)1, 0, x < edge); +} + +static inline SIMD_CFUNC double simd_step(double edge, double x) { + return !(x < edge); +} + +static inline SIMD_CFUNC simd_double2 simd_step(simd_double2 edge, simd_double2 x) { + return simd_bitselect((simd_double2)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_double3 simd_step(simd_double3 edge, simd_double3 x) { + return simd_bitselect((simd_double3)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_double4 simd_step(simd_double4 edge, simd_double4 x) { + return simd_bitselect((simd_double4)1, 0, x < edge); +} + +static inline SIMD_CFUNC simd_double8 simd_step(simd_double8 edge, simd_double8 x) { + return simd_bitselect((simd_double8)1, 0, x < edge); +} + +static inline SIMD_CFUNC float simd_smoothstep(float edge0, float edge1, float x) { + float t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float2 simd_smoothstep(simd_float2 edge0, simd_float2 edge1, simd_float2 x) { + simd_float2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float3 simd_smoothstep(simd_float3 edge0, simd_float3 edge1, simd_float3 x) { + simd_float3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float4 simd_smoothstep(simd_float4 edge0, simd_float4 edge1, simd_float4 x) { + simd_float4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float8 simd_smoothstep(simd_float8 edge0, simd_float8 edge1, simd_float8 x) { + simd_float8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_float16 simd_smoothstep(simd_float16 edge0, simd_float16 edge1, simd_float16 x) { + simd_float16 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC double simd_smoothstep(double edge0, double edge1, double x) { + double t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double2 simd_smoothstep(simd_double2 edge0, simd_double2 edge1, simd_double2 x) { + simd_double2 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double3 simd_smoothstep(simd_double3 edge0, simd_double3 edge1, simd_double3 x) { + simd_double3 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double4 simd_smoothstep(simd_double4 edge0, simd_double4 edge1, simd_double4 x) { + simd_double4 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC simd_double8 simd_smoothstep(simd_double8 edge0, simd_double8 edge1, simd_double8 x) { + simd_double8 t = simd_clamp((x - edge0)/(edge1 - edge0), 0, 1); + return t*t*(3 - 2*t); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_add(simd_char64 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_add(simd_uchar64 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC short simd_reduce_add(simd_short32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_add(simd_ushort32 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC int simd_reduce_add(simd_int16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_add(simd_uint16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC float simd_reduce_add(simd_float16 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_add(simd_long8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_add(simd_ulong8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double2 x) { + return x.x + x.y; +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double3 x) { + return x.x + x.y + x.z; +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double4 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC double simd_reduce_add(simd_double8 x) { + return simd_reduce_add(x.lo + x.hi); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char3 x) { + char t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char16 x) { +#if defined __arm64__ + return vminvq_s8(x); +#else + return simd_reduce_min(simd_min(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_min(simd_char64 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar3 x) { + unsigned char t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar16 x) { +#if defined __arm64__ + return vminvq_u8(x); +#else + return simd_reduce_min(simd_min(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_min(simd_uchar64 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short3 x) { + short t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short8 x) { +#if defined __arm64__ + return vminvq_s16(x); +#else + return simd_reduce_min(simd_min(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_min(simd_short32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort3 x) { + unsigned short t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort8 x) { +#if defined __arm64__ + return vminvq_u16(x); +#else + return simd_reduce_min(simd_min(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_min(simd_ushort32 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int3 x) { + int t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int4 x) { +#if defined __arm64__ + return vminvq_s32(x); +#else + return simd_reduce_min(simd_min(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_min(simd_int16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint3 x) { + unsigned int t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint4 x) { +#if defined __arm64__ + return vminvq_u32(x); +#else + return simd_reduce_min(simd_min(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_min(simd_uint16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long3 x) { + simd_long1 t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_min(simd_long8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong2 x) { + return x.y < x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong3 x) { + simd_ulong1 t = x.z < x.x ? x.z : x.x; + return x.y < t ? x.y : t; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_min(simd_ulong8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float2 x) { + return fmin(x.x, x.y); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float3 x) { + return fmin(fmin(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float4 x) { +#if defined __arm64__ + return vminvq_f32(x); +#else + return simd_reduce_min(simd_min(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_min(simd_float16 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double2 x) { +#if defined __arm64__ + return vminvq_f64(x); +#else + return fmin(x.x, x.y); +#endif +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double3 x) { + return fmin(fmin(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double4 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_min(simd_double8 x) { + return simd_reduce_min(simd_min(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char3 x) { + char t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char16 x) { +#if defined __arm64__ + return vmaxvq_s8(x); +#else + return simd_reduce_max(simd_max(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC char simd_reduce_max(simd_char64 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar3 x) { + unsigned char t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar16 x) { +#if defined __arm64__ + return vmaxvq_u8(x); +#else + return simd_reduce_max(simd_max(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned char simd_reduce_max(simd_uchar64 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short3 x) { + short t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short8 x) { +#if defined __arm64__ + return vmaxvq_s16(x); +#else + return simd_reduce_max(simd_max(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC short simd_reduce_max(simd_short32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort3 x) { + unsigned short t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort8 x) { +#if defined __arm64__ + return vmaxvq_u16(x); +#else + return simd_reduce_max(simd_max(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned short simd_reduce_max(simd_ushort32 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int3 x) { + int t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int4 x) { +#if defined __arm64__ + return vmaxvq_s32(x); +#else + return simd_reduce_max(simd_max(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC int simd_reduce_max(simd_int16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint3 x) { + unsigned int t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint4 x) { +#if defined __arm64__ + return vmaxvq_u32(x); +#else + return simd_reduce_max(simd_max(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC unsigned int simd_reduce_max(simd_uint16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long3 x) { + simd_long1 t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_long1 simd_reduce_max(simd_long8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong2 x) { + return x.y > x.x ? x.y : x.x; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong3 x) { + simd_ulong1 t = x.z > x.x ? x.z : x.x; + return x.y > t ? x.y : t; +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC simd_ulong1 simd_reduce_max(simd_ulong8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float2 x) { + return fmax(x.x, x.y); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float3 x) { + return fmax(fmax(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float4 x) { +#if defined __arm64__ + return vmaxvq_f32(x); +#else + return simd_reduce_max(simd_max(x.lo, x.hi)); +#endif +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC float simd_reduce_max(simd_float16 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double2 x) { +#if defined __arm64__ + return vmaxvq_f64(x); +#else + return fmax(x.x, x.y); +#endif +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double3 x) { + return fmax(fmax(x.x, x.z), x.y); +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double4 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +static inline SIMD_CFUNC double simd_reduce_max(simd_double8 x) { + return simd_reduce_max(simd_max(x.lo, x.hi)); +} + +#ifdef __cplusplus +} +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_COMMON_HEADER */ diff --git a/vfsoverlay/conversion.h b/vfsoverlay/conversion.h new file mode 100644 index 00000000..235a56b8 --- /dev/null +++ b/vfsoverlay/conversion.h @@ -0,0 +1,2032 @@ +/* Copyright (c) 2014-2017 Apple, Inc. All rights reserved. + * + * The interfaces declared in this header provide conversions between vector + * types. The following functions are available: + * + * simd_char(x) simd_uchar(x) + * simd_short(x) simd_ushort(x) + * simd_int(x) simd_uint(x) + * simd_long(x) simd_ulong(x) + * simd_float(x) + * simd_double(x) + * + * Each of these functions converts x to a vector whose elements have the + * type named by the function, with the same number of elements as x. Unlike + * a vector cast, these functions convert the elements to the new element + * type. These conversions behave exactly as C scalar conversions, except + * that conversions from integer vector types to signed integer vector types + * are guaranteed to wrap modulo 2^N (where N is the number of bits in an + * element of the result type). + * + * For integer vector types, saturating conversions are also available: + * + * simd_char_sat(x) simd_uchar_sat(x) + * simd_short_sat(x) simd_ushort_sat(x) + * simd_int_sat(x) simd_uint_sat(x) + * simd_long_sat(x) simd_ulong_sat(x) + * + * These conversions clamp x to the representable range of the result type + * before converting. + * + * In C++ the conversion functions are templated in the simd:: namespace. + * + * C++ Function Equivalent C Function + * ------------------------------------------------------------------- + * simd::convert(x) simd_ScalarType(x) + * simd::convert_sat(x) simd_ScalarType_sat(x) + */ + +#ifndef __SIMD_CONVERSION_HEADER__ +#define __SIMD_CONVERSION_HEADER__ + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static simd_char2 SIMD_CFUNC simd_char(simd_char2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_char3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_char4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_char8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_char16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_char32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_uchar2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_uchar3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_uchar4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_uchar8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_uchar16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_uchar32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_short2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_short3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_short4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_short8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_short16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_short32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_ushort2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_ushort3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_ushort4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_ushort8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_ushort16 __x); +static simd_char32 SIMD_CFUNC simd_char(simd_ushort32 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_int2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_int3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_int4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_int8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_int16 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_uint2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_uint3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_uint4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_uint8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_uint16 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_float2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_float3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_float4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_float8 __x); +static simd_char16 SIMD_CFUNC simd_char(simd_float16 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_long2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_long3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_long4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_long8 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_ulong2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_ulong3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_ulong4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_ulong8 __x); +static simd_char2 SIMD_CFUNC simd_char(simd_double2 __x); +static simd_char3 SIMD_CFUNC simd_char(simd_double3 __x); +static simd_char4 SIMD_CFUNC simd_char(simd_double4 __x); +static simd_char8 SIMD_CFUNC simd_char(simd_double8 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_char2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_char3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_char4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_char8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_char16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_char32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_short2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_short3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_short4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_short8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_short16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_short32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_int2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_int3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_int4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_int8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_int16 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_float2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_float3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_float4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_float8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_float16 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_long2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_long3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_long4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_long8 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_double2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_double3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_double4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_double8 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uchar2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uchar3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uchar4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uchar8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uchar16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_uchar32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ushort2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ushort3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ushort4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ushort8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_ushort16 __x); +static simd_char32 SIMD_CFUNC simd_char_sat(simd_ushort32 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uint2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uint3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uint4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uint8 __x); +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uint16 __x); +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ulong2 __x); +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ulong3 __x); +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ulong4 __x); +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ulong8 __x); +#define vector_char simd_char +#define vector_char_sat simd_char_sat + +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_char2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_char3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_char4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_char8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_char16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_char32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uchar2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uchar3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uchar4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uchar8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uchar16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_uchar32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_short2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_short3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_short4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_short8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_short16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_short32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ushort2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ushort3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ushort4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ushort8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_ushort16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_ushort32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_int2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_int3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_int4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_int8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_int16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uint2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uint3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uint4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uint8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uint16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_float2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_float3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_float4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_float8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_float16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_long2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_long3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_long4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_long8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ulong2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ulong3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ulong4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ulong8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_double2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_double3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_double4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_double8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_char2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_char3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_char4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_char8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_char16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_char32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_short2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_short3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_short4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_short8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_short16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_short32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_int2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_int3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_int4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_int8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_int16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_float2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_float3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_float4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_float8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_float16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_long2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_long3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_long4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_long8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_double2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_double3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_double4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_double8 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uchar2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uchar3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uchar4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uchar8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uchar16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_uchar32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ushort2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ushort3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ushort4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ushort8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_ushort16 __x); +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_ushort32 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uint2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uint3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uint4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uint8 __x); +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uint16 __x); +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ulong2 __x); +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ulong3 __x); +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ulong4 __x); +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ulong8 __x); +#define vector_uchar simd_uchar +#define vector_uchar_sat simd_uchar_sat + +static simd_short2 SIMD_CFUNC simd_short(simd_char2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_char3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_char4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_char8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_char16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_char32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_uchar2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_uchar3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_uchar4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_uchar8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_uchar16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_uchar32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_short2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_short3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_short4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_short8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_short16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_short32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_ushort2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_ushort3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_ushort4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_ushort8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_ushort16 __x); +static simd_short32 SIMD_CFUNC simd_short(simd_ushort32 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_int2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_int3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_int4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_int8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_int16 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_uint2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_uint3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_uint4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_uint8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_uint16 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_float2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_float3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_float4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_float8 __x); +static simd_short16 SIMD_CFUNC simd_short(simd_float16 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_long2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_long3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_long4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_long8 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_ulong2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_ulong3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_ulong4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_ulong8 __x); +static simd_short2 SIMD_CFUNC simd_short(simd_double2 __x); +static simd_short3 SIMD_CFUNC simd_short(simd_double3 __x); +static simd_short4 SIMD_CFUNC simd_short(simd_double4 __x); +static simd_short8 SIMD_CFUNC simd_short(simd_double8 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_char2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_char3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_char4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_char8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_char16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_char32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_short2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_short3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_short4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_short8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_short16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_short32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_int2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_int3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_int4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_int8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_int16 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_float2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_float3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_float4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_float8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_float16 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_long2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_long3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_long4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_long8 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_double2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_double3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_double4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_double8 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uchar2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uchar3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uchar4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uchar8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uchar16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_uchar32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ushort2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ushort3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ushort4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ushort8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_ushort16 __x); +static simd_short32 SIMD_CFUNC simd_short_sat(simd_ushort32 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uint2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uint3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uint4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uint8 __x); +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uint16 __x); +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ulong2 __x); +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ulong3 __x); +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ulong4 __x); +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ulong8 __x); +#define vector_short simd_short +#define vector_short_sat simd_short_sat + +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_char2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_char3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_char4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_char8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_char16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_char32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uchar2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uchar3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uchar4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uchar8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uchar16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_uchar32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_short2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_short3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_short4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_short8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_short16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_short32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ushort2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ushort3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ushort4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ushort8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_ushort16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_ushort32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_int2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_int3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_int4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_int8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_int16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uint2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uint3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uint4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uint8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uint16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_float2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_float3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_float4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_float8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_float16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_long2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_long3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_long4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_long8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ulong2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ulong3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ulong4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ulong8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_double2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_double3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_double4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_double8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_char2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_char3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_char4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_char8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_char16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_char32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_short2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_short3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_short4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_short8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_short16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_short32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_int2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_int3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_int4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_int8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_int16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_float2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_float3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_float4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_float8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_float16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_long2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_long3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_long4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_long8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_double2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_double3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_double4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_double8 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uchar2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uchar3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uchar4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uchar8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uchar16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_uchar32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ushort2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ushort3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ushort4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ushort8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_ushort16 __x); +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_ushort32 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uint2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uint3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uint4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uint8 __x); +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uint16 __x); +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ulong2 __x); +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ulong3 __x); +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ulong4 __x); +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ulong8 __x); +#define vector_ushort simd_ushort +#define vector_ushort_sat simd_ushort_sat + +static simd_int2 SIMD_CFUNC simd_int(simd_char2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_char3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_char4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_char8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_char16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_uchar2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_uchar3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_uchar4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_uchar8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_uchar16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_short2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_short3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_short4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_short8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_short16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_ushort2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_ushort3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_ushort4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_ushort8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_ushort16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_int2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_int3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_int4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_int8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_int16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_uint2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_uint3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_uint4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_uint8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_uint16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_float2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_float3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_float4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_float8 __x); +static simd_int16 SIMD_CFUNC simd_int(simd_float16 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_long2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_long3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_long4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_long8 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_ulong2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_ulong3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_ulong4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_ulong8 __x); +static simd_int2 SIMD_CFUNC simd_int(simd_double2 __x); +static simd_int3 SIMD_CFUNC simd_int(simd_double3 __x); +static simd_int4 SIMD_CFUNC simd_int(simd_double4 __x); +static simd_int8 SIMD_CFUNC simd_int(simd_double8 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_char2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_char3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_char4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_char8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_char16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_short2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_short3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_short4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_short8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_short16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_int2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_int3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_int4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_int8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_int16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_float2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_float3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_float4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_float8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_float16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_long2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_long3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_long4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_long8 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_double2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_double3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_double4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_double8 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uchar2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uchar3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uchar4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uchar8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uchar16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ushort2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ushort3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ushort4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ushort8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_ushort16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uint2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uint3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uint4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uint8 __x); +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uint16 __x); +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ulong2 __x); +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ulong3 __x); +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ulong4 __x); +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ulong8 __x); +static simd_int2 SIMD_CFUNC simd_int_rte(simd_float2 __x); +static simd_int3 SIMD_CFUNC simd_int_rte(simd_float3 __x); +static simd_int4 SIMD_CFUNC simd_int_rte(simd_float4 __x); +static simd_int8 SIMD_CFUNC simd_int_rte(simd_float8 __x); +static simd_int16 SIMD_CFUNC simd_int_rte(simd_float16 __x); +#define vector_int simd_int +#define vector_int_sat simd_int_sat + +static simd_uint2 SIMD_CFUNC simd_uint(simd_char2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_char3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_char4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_char8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_char16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_uchar2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_uchar3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_uchar4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_uchar8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_uchar16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_short2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_short3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_short4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_short8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_short16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_ushort2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_ushort3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_ushort4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_ushort8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_ushort16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_int2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_int3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_int4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_int8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_int16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_uint2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_uint3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_uint4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_uint8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_uint16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_float2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_float3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_float4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_float8 __x); +static simd_uint16 SIMD_CFUNC simd_uint(simd_float16 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_long2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_long3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_long4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_long8 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_ulong2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_ulong3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_ulong4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_ulong8 __x); +static simd_uint2 SIMD_CFUNC simd_uint(simd_double2 __x); +static simd_uint3 SIMD_CFUNC simd_uint(simd_double3 __x); +static simd_uint4 SIMD_CFUNC simd_uint(simd_double4 __x); +static simd_uint8 SIMD_CFUNC simd_uint(simd_double8 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_char2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_char3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_char4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_char8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_char16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_short2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_short3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_short4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_short8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_short16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_int2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_int3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_int4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_int8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_int16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_float2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_float3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_float4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_float8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_float16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_long2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_long3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_long4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_long8 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_double2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_double3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_double4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_double8 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uchar2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uchar3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uchar4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uchar8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uchar16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ushort2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ushort3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ushort4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ushort8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_ushort16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uint2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uint3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uint4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uint8 __x); +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uint16 __x); +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ulong2 __x); +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ulong3 __x); +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ulong4 __x); +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ulong8 __x); +#define vector_uint simd_uint +#define vector_uint_sat simd_uint_sat + +static simd_float2 SIMD_CFUNC simd_float(simd_char2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_char3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_char4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_char8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_char16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_uchar2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_uchar3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_uchar4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_uchar8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_uchar16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_short2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_short3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_short4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_short8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_short16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_ushort2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_ushort3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_ushort4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_ushort8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_ushort16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_int2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_int3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_int4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_int8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_int16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_uint2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_uint3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_uint4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_uint8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_uint16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_float2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_float3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_float4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_float8 __x); +static simd_float16 SIMD_CFUNC simd_float(simd_float16 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_long2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_long3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_long4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_long8 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_ulong2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_ulong3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_ulong4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_ulong8 __x); +static simd_float2 SIMD_CFUNC simd_float(simd_double2 __x); +static simd_float3 SIMD_CFUNC simd_float(simd_double3 __x); +static simd_float4 SIMD_CFUNC simd_float(simd_double4 __x); +static simd_float8 SIMD_CFUNC simd_float(simd_double8 __x); +#define vector_float simd_float + +static simd_long2 SIMD_CFUNC simd_long(simd_char2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_char3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_char4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_char8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_uchar2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_uchar3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_uchar4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_uchar8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_short2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_short3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_short4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_short8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_ushort2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_ushort3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_ushort4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_ushort8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_int2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_int3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_int4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_int8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_uint2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_uint3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_uint4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_uint8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_float2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_float3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_float4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_float8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_long2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_long3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_long4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_long8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_ulong2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_ulong3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_ulong4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_ulong8 __x); +static simd_long2 SIMD_CFUNC simd_long(simd_double2 __x); +static simd_long3 SIMD_CFUNC simd_long(simd_double3 __x); +static simd_long4 SIMD_CFUNC simd_long(simd_double4 __x); +static simd_long8 SIMD_CFUNC simd_long(simd_double8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_char2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_char3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_char4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_char8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_short2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_short3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_short4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_short8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_int2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_int3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_int4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_int8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_float2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_float3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_float4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_float8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_long2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_long3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_long4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_long8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_double2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_double3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_double4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_double8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uchar2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uchar3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uchar4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uchar8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ushort2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ushort3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ushort4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ushort8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uint2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uint3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uint4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uint8 __x); +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ulong2 __x); +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ulong3 __x); +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ulong4 __x); +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ulong8 __x); +static simd_long2 SIMD_CFUNC simd_long_rte(simd_double2 __x); +static simd_long3 SIMD_CFUNC simd_long_rte(simd_double3 __x); +static simd_long4 SIMD_CFUNC simd_long_rte(simd_double4 __x); +static simd_long8 SIMD_CFUNC simd_long_rte(simd_double8 __x); +#define vector_long simd_long +#define vector_long_sat simd_long_sat + +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_char2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_char3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_char4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_char8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uchar2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uchar3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uchar4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uchar8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_short2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_short3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_short4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_short8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ushort2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ushort3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ushort4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ushort8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_int2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_int3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_int4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_int8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uint2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uint3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uint4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uint8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_float2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_float3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_float4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_float8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_long2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_long3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_long4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_long8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ulong2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ulong3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ulong4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ulong8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_double2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_double3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_double4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_double8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_char2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_char3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_char4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_char8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_short2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_short3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_short4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_short8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_int2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_int3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_int4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_int8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_float2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_float3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_float4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_float8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_long2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_long3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_long4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_long8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_double2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_double3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_double4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_double8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uchar2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uchar3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uchar4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uchar8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ushort2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ushort3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ushort4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ushort8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uint2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uint3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uint4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uint8 __x); +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ulong2 __x); +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ulong3 __x); +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ulong4 __x); +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ulong8 __x); +#define vector_ulong simd_ulong +#define vector_ulong_sat simd_ulong_sat + +static simd_double2 SIMD_CFUNC simd_double(simd_char2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_char3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_char4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_char8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_uchar2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_uchar3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_uchar4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_uchar8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_short2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_short3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_short4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_short8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_ushort2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_ushort3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_ushort4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_ushort8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_int2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_int3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_int4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_int8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_uint2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_uint3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_uint4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_uint8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_float2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_float3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_float4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_float8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_long2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_long3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_long4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_long8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_ulong2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_ulong3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_ulong4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_ulong8 __x); +static simd_double2 SIMD_CFUNC simd_double(simd_double2 __x); +static simd_double3 SIMD_CFUNC simd_double(simd_double3 __x); +static simd_double4 SIMD_CFUNC simd_double(simd_double4 __x); +static simd_double8 SIMD_CFUNC simd_double(simd_double8 __x); +#define vector_double simd_double + +static simd_char2 SIMD_CFUNC vector2(char __x, char __y) { return ( simd_char2){__x, __y}; } +static simd_uchar2 SIMD_CFUNC vector2(unsigned char __x, unsigned char __y) { return ( simd_uchar2){__x, __y}; } +static simd_short2 SIMD_CFUNC vector2(short __x, short __y) { return ( simd_short2){__x, __y}; } +static simd_ushort2 SIMD_CFUNC vector2(unsigned short __x, unsigned short __y) { return (simd_ushort2){__x, __y}; } +static simd_int2 SIMD_CFUNC vector2(int __x, int __y) { return ( simd_int2){__x, __y}; } +static simd_uint2 SIMD_CFUNC vector2(unsigned int __x, unsigned int __y) { return ( simd_uint2){__x, __y}; } +static simd_float2 SIMD_CFUNC vector2(float __x, float __y) { return ( simd_float2){__x, __y}; } +static simd_long2 SIMD_CFUNC vector2(simd_long1 __x, simd_long1 __y) { return ( simd_long2){__x, __y}; } +static simd_ulong2 SIMD_CFUNC vector2(simd_ulong1 __x, simd_ulong1 __y) { return ( simd_ulong2){__x, __y}; } +static simd_double2 SIMD_CFUNC vector2(double __x, double __y) { return (simd_double2){__x, __y}; } + +static simd_char3 SIMD_CFUNC vector3(char __x, char __y, char __z) { return ( simd_char3){__x, __y, __z}; } +static simd_uchar3 SIMD_CFUNC vector3(unsigned char __x, unsigned char __y, unsigned char __z) { return ( simd_uchar3){__x, __y, __z}; } +static simd_short3 SIMD_CFUNC vector3(short __x, short __y, short __z) { return ( simd_short3){__x, __y, __z}; } +static simd_ushort3 SIMD_CFUNC vector3(unsigned short __x, unsigned short __y, unsigned short __z) { return (simd_ushort3){__x, __y, __z}; } +static simd_int3 SIMD_CFUNC vector3(int __x, int __y, int __z) { return ( simd_int3){__x, __y, __z}; } +static simd_uint3 SIMD_CFUNC vector3(unsigned int __x, unsigned int __y, unsigned int __z) { return ( simd_uint3){__x, __y, __z}; } +static simd_float3 SIMD_CFUNC vector3(float __x, float __y, float __z) { return ( simd_float3){__x, __y, __z}; } +static simd_long3 SIMD_CFUNC vector3(simd_long1 __x, simd_long1 __y, simd_long1 __z) { return ( simd_long3){__x, __y, __z}; } +static simd_ulong3 SIMD_CFUNC vector3(simd_ulong1 __x, simd_ulong1 __y, simd_ulong1 __z) { return ( simd_ulong3){__x, __y, __z}; } +static simd_double3 SIMD_CFUNC vector3(double __x, double __y, double __z) { return (simd_double3){__x, __y, __z}; } + +static simd_char3 SIMD_CFUNC vector3(simd_char2 __xy, char __z) { simd_char3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_uchar3 SIMD_CFUNC vector3(simd_uchar2 __xy, unsigned char __z) { simd_uchar3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_short3 SIMD_CFUNC vector3(simd_short2 __xy, short __z) { simd_short3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_ushort3 SIMD_CFUNC vector3(simd_ushort2 __xy, unsigned short __z) { simd_ushort3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_int3 SIMD_CFUNC vector3(simd_int2 __xy, int __z) { simd_int3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_uint3 SIMD_CFUNC vector3(simd_uint2 __xy, unsigned int __z) { simd_uint3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_float3 SIMD_CFUNC vector3(simd_float2 __xy, float __z) { simd_float3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_long3 SIMD_CFUNC vector3(simd_long2 __xy, simd_long1 __z) { simd_long3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_ulong3 SIMD_CFUNC vector3(simd_ulong2 __xy, simd_ulong1 __z) { simd_ulong3 __r; __r.xy = __xy; __r.z = __z; return __r; } +static simd_double3 SIMD_CFUNC vector3(simd_double2 __xy, double __z) { simd_double3 __r; __r.xy = __xy; __r.z = __z; return __r; } + +static simd_char4 SIMD_CFUNC vector4(char __x, char __y, char __z, char __w) { return ( simd_char4){__x, __y, __z, __w}; } +static simd_uchar4 SIMD_CFUNC vector4(unsigned char __x, unsigned char __y, unsigned char __z, unsigned char __w) { return ( simd_uchar4){__x, __y, __z, __w}; } +static simd_short4 SIMD_CFUNC vector4(short __x, short __y, short __z, short __w) { return ( simd_short4){__x, __y, __z, __w}; } +static simd_ushort4 SIMD_CFUNC vector4(unsigned short __x, unsigned short __y, unsigned short __z, unsigned short __w) { return (simd_ushort4){__x, __y, __z, __w}; } +static simd_int4 SIMD_CFUNC vector4(int __x, int __y, int __z, int __w) { return ( simd_int4){__x, __y, __z, __w}; } +static simd_uint4 SIMD_CFUNC vector4(unsigned int __x, unsigned int __y, unsigned int __z, unsigned int __w) { return ( simd_uint4){__x, __y, __z, __w}; } +static simd_float4 SIMD_CFUNC vector4(float __x, float __y, float __z, float __w) { return ( simd_float4){__x, __y, __z, __w}; } +static simd_long4 SIMD_CFUNC vector4(simd_long1 __x, simd_long1 __y, simd_long1 __z, simd_long1 __w) { return ( simd_long4){__x, __y, __z, __w}; } +static simd_ulong4 SIMD_CFUNC vector4(simd_ulong1 __x, simd_ulong1 __y, simd_ulong1 __z, simd_ulong1 __w) { return ( simd_ulong4){__x, __y, __z, __w}; } +static simd_double4 SIMD_CFUNC vector4(double __x, double __y, double __z, double __w) { return (simd_double4){__x, __y, __z, __w}; } + +static simd_char4 SIMD_CFUNC vector4(simd_char2 __xy, simd_char2 __zw) { simd_char4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_uchar4 SIMD_CFUNC vector4(simd_uchar2 __xy, simd_uchar2 __zw) { simd_uchar4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_short4 SIMD_CFUNC vector4(simd_short2 __xy, simd_short2 __zw) { simd_short4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_ushort4 SIMD_CFUNC vector4(simd_ushort2 __xy, simd_ushort2 __zw) { simd_ushort4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_int4 SIMD_CFUNC vector4(simd_int2 __xy, simd_int2 __zw) { simd_int4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_uint4 SIMD_CFUNC vector4(simd_uint2 __xy, simd_uint2 __zw) { simd_uint4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_float4 SIMD_CFUNC vector4(simd_float2 __xy, simd_float2 __zw) { simd_float4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_long4 SIMD_CFUNC vector4(simd_long2 __xy, simd_long2 __zw) { simd_long4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_ulong4 SIMD_CFUNC vector4(simd_ulong2 __xy, simd_ulong2 __zw) { simd_ulong4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } +static simd_double4 SIMD_CFUNC vector4(simd_double2 __xy, simd_double2 __zw) { simd_double4 __r; __r.xy = __xy; __r.zw = __zw; return __r; } + +static simd_char4 SIMD_CFUNC vector4(simd_char3 __xyz, char __w) { simd_char4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_uchar4 SIMD_CFUNC vector4(simd_uchar3 __xyz, unsigned char __w) { simd_uchar4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_short4 SIMD_CFUNC vector4(simd_short3 __xyz, short __w) { simd_short4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_ushort4 SIMD_CFUNC vector4(simd_ushort3 __xyz, unsigned short __w) { simd_ushort4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_int4 SIMD_CFUNC vector4(simd_int3 __xyz, int __w) { simd_int4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_uint4 SIMD_CFUNC vector4(simd_uint3 __xyz, unsigned int __w) { simd_uint4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_float4 SIMD_CFUNC vector4(simd_float3 __xyz, float __w) { simd_float4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_long4 SIMD_CFUNC vector4(simd_long3 __xyz, simd_long1 __w) { simd_long4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_ulong4 SIMD_CFUNC vector4(simd_ulong3 __xyz, simd_ulong1 __w) { simd_ulong4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } +static simd_double4 SIMD_CFUNC vector4(simd_double3 __xyz, double __w) { simd_double4 __r; __r.xyz = __xyz; __r.w = __w; return __r; } + +static simd_char8 SIMD_CFUNC vector8(simd_char4 __lo, simd_char4 __hi) { simd_char8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uchar8 SIMD_CFUNC vector8(simd_uchar4 __lo, simd_uchar4 __hi) { simd_uchar8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_short8 SIMD_CFUNC vector8(simd_short4 __lo, simd_short4 __hi) { simd_short8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ushort8 SIMD_CFUNC vector8(simd_ushort4 __lo, simd_ushort4 __hi) { simd_ushort8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_int8 SIMD_CFUNC vector8(simd_int4 __lo, simd_int4 __hi) { simd_int8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uint8 SIMD_CFUNC vector8(simd_uint4 __lo, simd_uint4 __hi) { simd_uint8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_float8 SIMD_CFUNC vector8(simd_float4 __lo, simd_float4 __hi) { simd_float8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_long8 SIMD_CFUNC vector8(simd_long4 __lo, simd_long4 __hi) { simd_long8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ulong8 SIMD_CFUNC vector8(simd_ulong4 __lo, simd_ulong4 __hi) { simd_ulong8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_double8 SIMD_CFUNC vector8(simd_double4 __lo, simd_double4 __hi) { simd_double8 __r; __r.lo = __lo; __r.hi = __hi; return __r; } + +static simd_char16 SIMD_CFUNC vector16(simd_char8 __lo, simd_char8 __hi) { simd_char16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uchar16 SIMD_CFUNC vector16(simd_uchar8 __lo, simd_uchar8 __hi) { simd_uchar16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_short16 SIMD_CFUNC vector16(simd_short8 __lo, simd_short8 __hi) { simd_short16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ushort16 SIMD_CFUNC vector16(simd_ushort8 __lo, simd_ushort8 __hi) { simd_ushort16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_int16 SIMD_CFUNC vector16(simd_int8 __lo, simd_int8 __hi) { simd_int16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uint16 SIMD_CFUNC vector16(simd_uint8 __lo, simd_uint8 __hi) { simd_uint16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_float16 SIMD_CFUNC vector16(simd_float8 __lo, simd_float8 __hi) { simd_float16 __r; __r.lo = __lo; __r.hi = __hi; return __r; } + +static simd_char32 SIMD_CFUNC vector32(simd_char16 __lo, simd_char16 __hi) { simd_char32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_uchar32 SIMD_CFUNC vector32(simd_uchar16 __lo, simd_uchar16 __hi) { simd_uchar32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_short32 SIMD_CFUNC vector32(simd_short16 __lo, simd_short16 __hi) { simd_short32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } +static simd_ushort32 SIMD_CFUNC vector32(simd_ushort16 __lo, simd_ushort16 __hi) { simd_ushort32 __r; __r.lo = __lo; __r.hi = __hi; return __r; } + +#pragma mark - Implementation + +static simd_char2 SIMD_CFUNC simd_char(simd_char2 __x) { return __x; } +static simd_char3 SIMD_CFUNC simd_char(simd_char3 __x) { return __x; } +static simd_char4 SIMD_CFUNC simd_char(simd_char4 __x) { return __x; } +static simd_char8 SIMD_CFUNC simd_char(simd_char8 __x) { return __x; } +static simd_char16 SIMD_CFUNC simd_char(simd_char16 __x) { return __x; } +static simd_char32 SIMD_CFUNC simd_char(simd_char32 __x) { return __x; } +static simd_char2 SIMD_CFUNC simd_char(simd_uchar2 __x) { return (simd_char2)__x; } +static simd_char3 SIMD_CFUNC simd_char(simd_uchar3 __x) { return (simd_char3)__x; } +static simd_char4 SIMD_CFUNC simd_char(simd_uchar4 __x) { return (simd_char4)__x; } +static simd_char8 SIMD_CFUNC simd_char(simd_uchar8 __x) { return (simd_char8)__x; } +static simd_char16 SIMD_CFUNC simd_char(simd_uchar16 __x) { return (simd_char16)__x; } +static simd_char32 SIMD_CFUNC simd_char(simd_uchar32 __x) { return (simd_char32)__x; } +static simd_char2 SIMD_CFUNC simd_char(simd_short2 __x) { return __builtin_convertvector(__x & 0xff, simd_char2); } +static simd_char3 SIMD_CFUNC simd_char(simd_short3 __x) { return __builtin_convertvector(__x & 0xff, simd_char3); } +static simd_char4 SIMD_CFUNC simd_char(simd_short4 __x) { return __builtin_convertvector(__x & 0xff, simd_char4); } +static simd_char8 SIMD_CFUNC simd_char(simd_short8 __x) { return __builtin_convertvector(__x & 0xff, simd_char8); } +static simd_char16 SIMD_CFUNC simd_char(simd_short16 __x) { return __builtin_convertvector(__x & 0xff, simd_char16); } +static simd_char32 SIMD_CFUNC simd_char(simd_short32 __x) { return __builtin_convertvector(__x & 0xff, simd_char32); } +static simd_char2 SIMD_CFUNC simd_char(simd_ushort2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_ushort3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_ushort4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_ushort8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_ushort16 __x) { return simd_char(simd_short(__x)); } +static simd_char32 SIMD_CFUNC simd_char(simd_ushort32 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_int2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_int3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_int4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_int8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_int16 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_uint2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_uint3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_uint4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_uint8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_uint16 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_float2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_float3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_float4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_float8 __x) { return simd_char(simd_short(__x)); } +static simd_char16 SIMD_CFUNC simd_char(simd_float16 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_long2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_long3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_long4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_long8 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_ulong2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_ulong3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_ulong4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_ulong8 __x) { return simd_char(simd_short(__x)); } +static simd_char2 SIMD_CFUNC simd_char(simd_double2 __x) { return simd_char(simd_short(__x)); } +static simd_char3 SIMD_CFUNC simd_char(simd_double3 __x) { return simd_char(simd_short(__x)); } +static simd_char4 SIMD_CFUNC simd_char(simd_double4 __x) { return simd_char(simd_short(__x)); } +static simd_char8 SIMD_CFUNC simd_char(simd_double8 __x) { return simd_char(simd_short(__x)); } + +static simd_char2 SIMD_CFUNC simd_char_sat(simd_char2 __x) { return __x; } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_char3 __x) { return __x; } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_char4 __x) { return __x; } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_char8 __x) { return __x; } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_char16 __x) { return __x; } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_char32 __x) { return __x; } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_short2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_short3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_short4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_short8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_short16 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_short32 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_int2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_int3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_int4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_int8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_int16 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_float2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_float3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_float4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_float8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_float16 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_long2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_long3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_long4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_long8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_double2 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_double3 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_double4 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_double8 __x) { return simd_char(simd_clamp(__x,-0x80,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uchar2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uchar3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uchar4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uchar8 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uchar16 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_uchar32 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ushort2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ushort3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ushort4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ushort8 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_ushort16 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char32 SIMD_CFUNC simd_char_sat(simd_ushort32 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_uint2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_uint3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_uint4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_uint8 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char16 SIMD_CFUNC simd_char_sat(simd_uint16 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char2 SIMD_CFUNC simd_char_sat(simd_ulong2 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char3 SIMD_CFUNC simd_char_sat(simd_ulong3 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char4 SIMD_CFUNC simd_char_sat(simd_ulong4 __x) { return simd_char(simd_min(__x,0x7f)); } +static simd_char8 SIMD_CFUNC simd_char_sat(simd_ulong8 __x) { return simd_char(simd_min(__x,0x7f)); } + + +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_char2 __x) { return (simd_uchar2)__x; } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_char3 __x) { return (simd_uchar3)__x; } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_char4 __x) { return (simd_uchar4)__x; } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_char8 __x) { return (simd_uchar8)__x; } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_char16 __x) { return (simd_uchar16)__x; } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_char32 __x) { return (simd_uchar32)__x; } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uchar2 __x) { return __x; } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uchar3 __x) { return __x; } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uchar4 __x) { return __x; } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uchar8 __x) { return __x; } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uchar16 __x) { return __x; } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_uchar32 __x) { return __x; } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_short2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_short3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_short4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_short8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_short16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_short32 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ushort2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ushort3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ushort4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ushort8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_ushort16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar32 SIMD_CFUNC simd_uchar(simd_ushort32 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_int2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_int3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_int4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_int8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_int16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_uint2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_uint3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_uint4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_uint8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_uint16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_float2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_float3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_float4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_float8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar(simd_float16 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_long2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_long3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_long4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_long8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_ulong2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_ulong3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_ulong4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_ulong8 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar(simd_double2 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar(simd_double3 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar(simd_double4 __x) { return simd_uchar(simd_char(__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar(simd_double8 __x) { return simd_uchar(simd_char(__x)); } + +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_char2 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_char3 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_char4 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_char8 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_char16 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_char32 __x) { return simd_uchar(simd_max(0,__x)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_short2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_short3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_short4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_short8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_short16 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_short32 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_int2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_int3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_int4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_int8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_int16 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_float2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_float3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_float4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_float8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_float16 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_long2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_long3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_long4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_long8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_double2 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_double3 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_double4 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_double8 __x) { return simd_uchar(simd_clamp(__x,0,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uchar2 __x) { return __x; } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uchar3 __x) { return __x; } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uchar4 __x) { return __x; } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uchar8 __x) { return __x; } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uchar16 __x) { return __x; } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_uchar32 __x) { return __x; } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ushort2 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ushort3 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ushort4 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ushort8 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_ushort16 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar32 SIMD_CFUNC simd_uchar_sat(simd_ushort32 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_uint2 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_uint3 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_uint4 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_uint8 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar16 SIMD_CFUNC simd_uchar_sat(simd_uint16 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar2 SIMD_CFUNC simd_uchar_sat(simd_ulong2 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar3 SIMD_CFUNC simd_uchar_sat(simd_ulong3 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar4 SIMD_CFUNC simd_uchar_sat(simd_ulong4 __x) { return simd_uchar(simd_min(__x,0xff)); } +static simd_uchar8 SIMD_CFUNC simd_uchar_sat(simd_ulong8 __x) { return simd_uchar(simd_min(__x,0xff)); } + + +static simd_short2 SIMD_CFUNC simd_short(simd_char2 __x) { return __builtin_convertvector(__x, simd_short2); } +static simd_short3 SIMD_CFUNC simd_short(simd_char3 __x) { return __builtin_convertvector(__x, simd_short3); } +static simd_short4 SIMD_CFUNC simd_short(simd_char4 __x) { return __builtin_convertvector(__x, simd_short4); } +static simd_short8 SIMD_CFUNC simd_short(simd_char8 __x) { return __builtin_convertvector(__x, simd_short8); } +static simd_short16 SIMD_CFUNC simd_short(simd_char16 __x) { return __builtin_convertvector(__x, simd_short16); } +static simd_short32 SIMD_CFUNC simd_short(simd_char32 __x) { return __builtin_convertvector(__x, simd_short32); } +static simd_short2 SIMD_CFUNC simd_short(simd_uchar2 __x) { return __builtin_convertvector(__x, simd_short2); } +static simd_short3 SIMD_CFUNC simd_short(simd_uchar3 __x) { return __builtin_convertvector(__x, simd_short3); } +static simd_short4 SIMD_CFUNC simd_short(simd_uchar4 __x) { return __builtin_convertvector(__x, simd_short4); } +static simd_short8 SIMD_CFUNC simd_short(simd_uchar8 __x) { return __builtin_convertvector(__x, simd_short8); } +static simd_short16 SIMD_CFUNC simd_short(simd_uchar16 __x) { return __builtin_convertvector(__x, simd_short16); } +static simd_short32 SIMD_CFUNC simd_short(simd_uchar32 __x) { return __builtin_convertvector(__x, simd_short32); } +static simd_short2 SIMD_CFUNC simd_short(simd_short2 __x) { return __x; } +static simd_short3 SIMD_CFUNC simd_short(simd_short3 __x) { return __x; } +static simd_short4 SIMD_CFUNC simd_short(simd_short4 __x) { return __x; } +static simd_short8 SIMD_CFUNC simd_short(simd_short8 __x) { return __x; } +static simd_short16 SIMD_CFUNC simd_short(simd_short16 __x) { return __x; } +static simd_short32 SIMD_CFUNC simd_short(simd_short32 __x) { return __x; } +static simd_short2 SIMD_CFUNC simd_short(simd_ushort2 __x) { return (simd_short2)__x; } +static simd_short3 SIMD_CFUNC simd_short(simd_ushort3 __x) { return (simd_short3)__x; } +static simd_short4 SIMD_CFUNC simd_short(simd_ushort4 __x) { return (simd_short4)__x; } +static simd_short8 SIMD_CFUNC simd_short(simd_ushort8 __x) { return (simd_short8)__x; } +static simd_short16 SIMD_CFUNC simd_short(simd_ushort16 __x) { return (simd_short16)__x; } +static simd_short32 SIMD_CFUNC simd_short(simd_ushort32 __x) { return (simd_short32)__x; } +static simd_short2 SIMD_CFUNC simd_short(simd_int2 __x) { return __builtin_convertvector(__x & 0xffff, simd_short2); } +static simd_short3 SIMD_CFUNC simd_short(simd_int3 __x) { return __builtin_convertvector(__x & 0xffff, simd_short3); } +static simd_short4 SIMD_CFUNC simd_short(simd_int4 __x) { return __builtin_convertvector(__x & 0xffff, simd_short4); } +static simd_short8 SIMD_CFUNC simd_short(simd_int8 __x) { return __builtin_convertvector(__x & 0xffff, simd_short8); } +static simd_short16 SIMD_CFUNC simd_short(simd_int16 __x) { return __builtin_convertvector(__x & 0xffff, simd_short16); } +static simd_short2 SIMD_CFUNC simd_short(simd_uint2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_uint3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_uint4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_uint8 __x) { return simd_short(simd_int(__x)); } +static simd_short16 SIMD_CFUNC simd_short(simd_uint16 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_float2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_float3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_float4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_float8 __x) { return simd_short(simd_int(__x)); } +static simd_short16 SIMD_CFUNC simd_short(simd_float16 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_long2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_long3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_long4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_long8 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_ulong2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_ulong3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_ulong4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_ulong8 __x) { return simd_short(simd_int(__x)); } +static simd_short2 SIMD_CFUNC simd_short(simd_double2 __x) { return simd_short(simd_int(__x)); } +static simd_short3 SIMD_CFUNC simd_short(simd_double3 __x) { return simd_short(simd_int(__x)); } +static simd_short4 SIMD_CFUNC simd_short(simd_double4 __x) { return simd_short(simd_int(__x)); } +static simd_short8 SIMD_CFUNC simd_short(simd_double8 __x) { return simd_short(simd_int(__x)); } + +static simd_short2 SIMD_CFUNC simd_short_sat(simd_char2 __x) { return simd_short(__x); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_char3 __x) { return simd_short(__x); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_char4 __x) { return simd_short(__x); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_char8 __x) { return simd_short(__x); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_char16 __x) { return simd_short(__x); } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_char32 __x) { return simd_short(__x); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_short2 __x) { return __x; } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_short3 __x) { return __x; } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_short4 __x) { return __x; } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_short8 __x) { return __x; } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_short16 __x) { return __x; } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_short32 __x) { return __x; } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_int2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_int3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_int4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_int8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_int16 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_float2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_float3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_float4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_float8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_float16 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_long2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_long3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_long4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_long8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_double2 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_double3 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_double4 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_double8 __x) { return simd_short(simd_clamp(__x,-0x8000,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uchar2 __x) { return simd_short(__x); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uchar3 __x) { return simd_short(__x); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uchar4 __x) { return simd_short(__x); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uchar8 __x) { return simd_short(__x); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uchar16 __x) { return simd_short(__x); } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_uchar32 __x) { return simd_short(__x); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ushort2 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ushort3 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ushort4 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ushort8 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_ushort16 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short32 SIMD_CFUNC simd_short_sat(simd_ushort32 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_uint2 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_uint3 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_uint4 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_uint8 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short16 SIMD_CFUNC simd_short_sat(simd_uint16 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short2 SIMD_CFUNC simd_short_sat(simd_ulong2 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short3 SIMD_CFUNC simd_short_sat(simd_ulong3 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short4 SIMD_CFUNC simd_short_sat(simd_ulong4 __x) { return simd_short(simd_min(__x,0x7fff)); } +static simd_short8 SIMD_CFUNC simd_short_sat(simd_ulong8 __x) { return simd_short(simd_min(__x,0x7fff)); } + + +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_char2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_char3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_char4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_char8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_char16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_char32 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uchar2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uchar3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uchar4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uchar8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uchar16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_uchar32 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_short2 __x) { return (simd_ushort2)__x; } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_short3 __x) { return (simd_ushort3)__x; } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_short4 __x) { return (simd_ushort4)__x; } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_short8 __x) { return (simd_ushort8)__x; } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_short16 __x) { return (simd_ushort16)__x; } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_short32 __x) { return (simd_ushort32)__x; } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ushort2 __x) { return __x; } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ushort3 __x) { return __x; } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ushort4 __x) { return __x; } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ushort8 __x) { return __x; } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_ushort16 __x) { return __x; } +static simd_ushort32 SIMD_CFUNC simd_ushort(simd_ushort32 __x) { return __x; } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_int2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_int3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_int4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_int8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_int16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_uint2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_uint3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_uint4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_uint8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_uint16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_float2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_float3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_float4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_float8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort16 SIMD_CFUNC simd_ushort(simd_float16 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_long2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_long3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_long4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_long8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_ulong2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_ulong3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_ulong4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_ulong8 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort2 SIMD_CFUNC simd_ushort(simd_double2 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort3 SIMD_CFUNC simd_ushort(simd_double3 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort4 SIMD_CFUNC simd_ushort(simd_double4 __x) { return simd_ushort(simd_short(__x)); } +static simd_ushort8 SIMD_CFUNC simd_ushort(simd_double8 __x) { return simd_ushort(simd_short(__x)); } + +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_char2 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_char3 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_char4 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_char8 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_char16 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_char32 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_short2 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_short3 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_short4 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_short8 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_short16 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_short32 __x) { return simd_ushort(simd_max(__x, 0)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_int2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_int3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_int4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_int8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_int16 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_float2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_float3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_float4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_float8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_float16 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_long2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_long3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_long4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_long8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_double2 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_double3 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_double4 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_double8 __x) { return simd_ushort(simd_clamp(__x, 0, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uchar2 __x) { return simd_ushort(__x); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uchar3 __x) { return simd_ushort(__x); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uchar4 __x) { return simd_ushort(__x); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uchar8 __x) { return simd_ushort(__x); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uchar16 __x) { return simd_ushort(__x); } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_uchar32 __x) { return simd_ushort(__x); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ushort2 __x) { return __x; } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ushort3 __x) { return __x; } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ushort4 __x) { return __x; } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ushort8 __x) { return __x; } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_ushort16 __x) { return __x; } +static simd_ushort32 SIMD_CFUNC simd_ushort_sat(simd_ushort32 __x) { return __x; } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_uint2 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_uint3 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_uint4 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_uint8 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort16 SIMD_CFUNC simd_ushort_sat(simd_uint16 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort2 SIMD_CFUNC simd_ushort_sat(simd_ulong2 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort3 SIMD_CFUNC simd_ushort_sat(simd_ulong3 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort4 SIMD_CFUNC simd_ushort_sat(simd_ulong4 __x) { return simd_ushort(simd_min(__x, 0xffff)); } +static simd_ushort8 SIMD_CFUNC simd_ushort_sat(simd_ulong8 __x) { return simd_ushort(simd_min(__x, 0xffff)); } + + +static simd_int2 SIMD_CFUNC simd_int(simd_char2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_char3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_char4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_char8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_char16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_uchar2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_uchar3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_uchar4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_uchar8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_uchar16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_short2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_short3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_short4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_short8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_short16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_ushort2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_ushort3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_ushort4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_ushort8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_ushort16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_int2 __x) { return __x; } +static simd_int3 SIMD_CFUNC simd_int(simd_int3 __x) { return __x; } +static simd_int4 SIMD_CFUNC simd_int(simd_int4 __x) { return __x; } +static simd_int8 SIMD_CFUNC simd_int(simd_int8 __x) { return __x; } +static simd_int16 SIMD_CFUNC simd_int(simd_int16 __x) { return __x; } +static simd_int2 SIMD_CFUNC simd_int(simd_uint2 __x) { return (simd_int2)__x; } +static simd_int3 SIMD_CFUNC simd_int(simd_uint3 __x) { return (simd_int3)__x; } +static simd_int4 SIMD_CFUNC simd_int(simd_uint4 __x) { return (simd_int4)__x; } +static simd_int8 SIMD_CFUNC simd_int(simd_uint8 __x) { return (simd_int8)__x; } +static simd_int16 SIMD_CFUNC simd_int(simd_uint16 __x) { return (simd_int16)__x; } +static simd_int2 SIMD_CFUNC simd_int(simd_float2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_float3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_float4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_float8 __x) { return __builtin_convertvector(__x, simd_int8); } +static simd_int16 SIMD_CFUNC simd_int(simd_float16 __x) { return __builtin_convertvector(__x, simd_int16); } +static simd_int2 SIMD_CFUNC simd_int(simd_long2 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_long3 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_long4 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_long8 __x) { return __builtin_convertvector(__x & 0xffffffff, simd_int8); } +static simd_int2 SIMD_CFUNC simd_int(simd_ulong2 __x) { return simd_int(simd_long(__x)); } +static simd_int3 SIMD_CFUNC simd_int(simd_ulong3 __x) { return simd_int(simd_long(__x)); } +static simd_int4 SIMD_CFUNC simd_int(simd_ulong4 __x) { return simd_int(simd_long(__x)); } +static simd_int8 SIMD_CFUNC simd_int(simd_ulong8 __x) { return simd_int(simd_long(__x)); } +static simd_int2 SIMD_CFUNC simd_int(simd_double2 __x) { return __builtin_convertvector(__x, simd_int2); } +static simd_int3 SIMD_CFUNC simd_int(simd_double3 __x) { return __builtin_convertvector(__x, simd_int3); } +static simd_int4 SIMD_CFUNC simd_int(simd_double4 __x) { return __builtin_convertvector(__x, simd_int4); } +static simd_int8 SIMD_CFUNC simd_int(simd_double8 __x) { return __builtin_convertvector(__x, simd_int8); } + +static simd_int2 SIMD_CFUNC simd_int_sat(simd_char2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_char3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_char4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_char8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_char16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_short2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_short3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_short4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_short8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_short16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_int2 __x) { return __x; } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_int3 __x) { return __x; } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_int4 __x) { return __x; } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_int8 __x) { return __x; } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_int16 __x) { return __x; } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_float2 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_float3 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_float4 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_float8 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_float16 __x) { return simd_bitselect(simd_int(simd_max(__x,-0x1.0p31f)), 0x7fffffff, __x >= 0x1.0p31f); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_long2 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_long3 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_long4 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_long8 __x) { return simd_int(simd_clamp(__x,-0x80000000LL,0x7fffffffLL)); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_double2 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_double3 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_double4 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_double8 __x) { return simd_int(simd_clamp(__x,-0x1.0p31,0x1.fffffffcp30)); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uchar2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uchar3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uchar4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uchar8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uchar16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ushort2 __x) { return simd_int(__x); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ushort3 __x) { return simd_int(__x); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ushort4 __x) { return simd_int(__x); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ushort8 __x) { return simd_int(__x); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_ushort16 __x) { return simd_int(__x); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_uint2 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_uint3 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_uint4 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_uint8 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int16 SIMD_CFUNC simd_int_sat(simd_uint16 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int2 SIMD_CFUNC simd_int_sat(simd_ulong2 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int3 SIMD_CFUNC simd_int_sat(simd_ulong3 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int4 SIMD_CFUNC simd_int_sat(simd_ulong4 __x) { return simd_int(simd_min(__x,0x7fffffff)); } +static simd_int8 SIMD_CFUNC simd_int_sat(simd_ulong8 __x) { return simd_int(simd_min(__x,0x7fffffff)); } + +static simd_int2 SIMD_CFUNC simd_int_rte(simd_float2 __x) { +#if defined __arm64__ + return vcvtn_s32_f32(__x); +#else + return simd_make_int2(simd_int_rte(simd_make_float4_undef(__x))); +#endif +} + +static simd_int3 SIMD_CFUNC simd_int_rte(simd_float3 __x) { + return simd_make_int3(simd_int_rte(simd_make_float4_undef(__x))); +} + +static simd_int4 SIMD_CFUNC simd_int_rte(simd_float4 __x) { +#if defined __SSE2__ + return _mm_cvtps_epi32(__x); +#elif defined __arm64__ + return vcvtnq_s32_f32(__x); +#else + simd_float4 magic = __tg_copysign(0x1.0p23, __x); + simd_int4 x_is_small = __tg_fabs(__x) < 0x1.0p23; + return __builtin_convertvector(simd_bitselect(__x, (__x + magic) - magic, x_is_small & 0x7fffffff), simd_int4); +#endif +} + +static simd_int8 SIMD_CFUNC simd_int_rte(simd_float8 __x) { +#if defined __AVX__ + return _mm256_cvtps_epi32(__x); +#else + return simd_make_int8(simd_int_rte(__x.lo), simd_int_rte(__x.hi)); +#endif +} + +static simd_int16 SIMD_CFUNC simd_int_rte(simd_float16 __x) { +#if defined __AVX512F__ + return _mm512_cvt_roundps_epi32(__x, _MM_FROUND_RINT); +#else + return simd_make_int16(simd_int_rte(__x.lo), simd_int_rte(__x.hi)); +#endif +} + +static simd_uint2 SIMD_CFUNC simd_uint(simd_char2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_char3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_char4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_char8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_char16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_uchar2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_uchar3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_uchar4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_uchar8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_uchar16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_short2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_short3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_short4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_short8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_short16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_ushort2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_ushort3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_ushort4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_ushort8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_ushort16 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_int2 __x) { return (simd_uint2)__x; } +static simd_uint3 SIMD_CFUNC simd_uint(simd_int3 __x) { return (simd_uint3)__x; } +static simd_uint4 SIMD_CFUNC simd_uint(simd_int4 __x) { return (simd_uint4)__x; } +static simd_uint8 SIMD_CFUNC simd_uint(simd_int8 __x) { return (simd_uint8)__x; } +static simd_uint16 SIMD_CFUNC simd_uint(simd_int16 __x) { return (simd_uint16)__x; } +static simd_uint2 SIMD_CFUNC simd_uint(simd_uint2 __x) { return __x; } +static simd_uint3 SIMD_CFUNC simd_uint(simd_uint3 __x) { return __x; } +static simd_uint4 SIMD_CFUNC simd_uint(simd_uint4 __x) { return __x; } +static simd_uint8 SIMD_CFUNC simd_uint(simd_uint8 __x) { return __x; } +static simd_uint16 SIMD_CFUNC simd_uint(simd_uint16 __x) { return __x; } +static simd_uint2 SIMD_CFUNC simd_uint(simd_float2 __x) { simd_int2 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float2)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint2)0,0x80000000,__big); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_float3 __x) { simd_int3 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float3)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint3)0,0x80000000,__big); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_float4 __x) { simd_int4 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float4)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint4)0,0x80000000,__big); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_float8 __x) { simd_int8 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float8)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint8)0,0x80000000,__big); } +static simd_uint16 SIMD_CFUNC simd_uint(simd_float16 __x) { simd_int16 __big = __x > 0x1.0p31f; return simd_uint(simd_int(__x - simd_bitselect((simd_float16)0,0x1.0p31f,__big))) + simd_bitselect((simd_uint16)0,0x80000000,__big); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_long2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_long3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_long4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_long8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_ulong2 __x) { return simd_uint(simd_int(__x)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_ulong3 __x) { return simd_uint(simd_int(__x)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_ulong4 __x) { return simd_uint(simd_int(__x)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_ulong8 __x) { return simd_uint(simd_int(__x)); } +static simd_uint2 SIMD_CFUNC simd_uint(simd_double2 __x) { simd_long2 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double2)0,0x1.0p31,__big))) + simd_bitselect((simd_uint2)0,0x80000000,simd_int(__big)); } +static simd_uint3 SIMD_CFUNC simd_uint(simd_double3 __x) { simd_long3 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double3)0,0x1.0p31,__big))) + simd_bitselect((simd_uint3)0,0x80000000,simd_int(__big)); } +static simd_uint4 SIMD_CFUNC simd_uint(simd_double4 __x) { simd_long4 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double4)0,0x1.0p31,__big))) + simd_bitselect((simd_uint4)0,0x80000000,simd_int(__big)); } +static simd_uint8 SIMD_CFUNC simd_uint(simd_double8 __x) { simd_long8 __big = __x > 0x1.fffffffcp30; return simd_uint(simd_int(__x - simd_bitselect((simd_double8)0,0x1.0p31,__big))) + simd_bitselect((simd_uint8)0,0x80000000,simd_int(__big)); } + +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_char2 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_char3 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_char4 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_char8 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_char16 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_short2 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_short3 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_short4 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_short8 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_short16 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_int2 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_int3 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_int4 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_int8 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_int16 __x) { return simd_uint(simd_max(__x,0)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_float2 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_float3 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_float4 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_float8 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_float16 __x) { return simd_bitselect(simd_uint(simd_max(__x,0)), 0xffffffff, __x >= 0x1.0p32f); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_long2 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_long3 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_long4 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_long8 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_double2 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_double3 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_double4 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_double8 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uchar2 __x) { return simd_uint(__x); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uchar3 __x) { return simd_uint(__x); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uchar4 __x) { return simd_uint(__x); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uchar8 __x) { return simd_uint(__x); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uchar16 __x) { return simd_uint(__x); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ushort2 __x) { return simd_uint(__x); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ushort3 __x) { return simd_uint(__x); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ushort4 __x) { return simd_uint(__x); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ushort8 __x) { return simd_uint(__x); } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_ushort16 __x) { return simd_uint(__x); } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_uint2 __x) { return __x; } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_uint3 __x) { return __x; } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_uint4 __x) { return __x; } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_uint8 __x) { return __x; } +static simd_uint16 SIMD_CFUNC simd_uint_sat(simd_uint16 __x) { return __x; } +static simd_uint2 SIMD_CFUNC simd_uint_sat(simd_ulong2 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint3 SIMD_CFUNC simd_uint_sat(simd_ulong3 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint4 SIMD_CFUNC simd_uint_sat(simd_ulong4 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } +static simd_uint8 SIMD_CFUNC simd_uint_sat(simd_ulong8 __x) { return simd_uint(simd_clamp(__x,0,0xffffffff)); } + + +static simd_float2 SIMD_CFUNC simd_float(simd_char2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_char3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_char4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_char8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_char16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_uchar2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_uchar3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_uchar4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_uchar8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_uchar16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_short2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_short3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_short4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_short8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_short16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_ushort2 __x) { return (simd_float2)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float3 SIMD_CFUNC simd_float(simd_ushort3 __x) { return (simd_float3)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float4 SIMD_CFUNC simd_float(simd_ushort4 __x) { return (simd_float4)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float8 SIMD_CFUNC simd_float(simd_ushort8 __x) { return (simd_float8)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float16 SIMD_CFUNC simd_float(simd_ushort16 __x) { return (simd_float16)(simd_int(__x) + 0x4b400000) - 0x1.8p23f; } +static simd_float2 SIMD_CFUNC simd_float(simd_int2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_int3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_int4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_int8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float16 SIMD_CFUNC simd_float(simd_int16 __x) { return __builtin_convertvector(__x,simd_float16); } +static simd_float2 SIMD_CFUNC simd_float(simd_uint2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_uint3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_uint4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_uint8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float16 SIMD_CFUNC simd_float(simd_uint16 __x) { return __builtin_convertvector(__x,simd_float16); } +static simd_float2 SIMD_CFUNC simd_float(simd_float2 __x) { return __x; } +static simd_float3 SIMD_CFUNC simd_float(simd_float3 __x) { return __x; } +static simd_float4 SIMD_CFUNC simd_float(simd_float4 __x) { return __x; } +static simd_float8 SIMD_CFUNC simd_float(simd_float8 __x) { return __x; } +static simd_float16 SIMD_CFUNC simd_float(simd_float16 __x) { return __x; } +static simd_float2 SIMD_CFUNC simd_float(simd_long2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_long3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_long4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_long8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float2 SIMD_CFUNC simd_float(simd_ulong2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_ulong3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_ulong4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_ulong8 __x) { return __builtin_convertvector(__x,simd_float8); } +static simd_float2 SIMD_CFUNC simd_float(simd_double2 __x) { return __builtin_convertvector(__x,simd_float2); } +static simd_float3 SIMD_CFUNC simd_float(simd_double3 __x) { return __builtin_convertvector(__x,simd_float3); } +static simd_float4 SIMD_CFUNC simd_float(simd_double4 __x) { return __builtin_convertvector(__x,simd_float4); } +static simd_float8 SIMD_CFUNC simd_float(simd_double8 __x) { return __builtin_convertvector(__x,simd_float8); } + + +static simd_long2 SIMD_CFUNC simd_long(simd_char2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_char3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_char4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_char8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_uchar2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_uchar3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_uchar4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_uchar8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_short2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_short3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_short4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_short8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_ushort2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_ushort3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_ushort4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_ushort8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_int2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_int3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_int4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_int8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_uint2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_uint3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_uint4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_uint8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_float2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_float3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_float4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_float8 __x) { return __builtin_convertvector(__x,simd_long8); } +static simd_long2 SIMD_CFUNC simd_long(simd_long2 __x) { return __x; } +static simd_long3 SIMD_CFUNC simd_long(simd_long3 __x) { return __x; } +static simd_long4 SIMD_CFUNC simd_long(simd_long4 __x) { return __x; } +static simd_long8 SIMD_CFUNC simd_long(simd_long8 __x) { return __x; } +static simd_long2 SIMD_CFUNC simd_long(simd_ulong2 __x) { return (simd_long2)__x; } +static simd_long3 SIMD_CFUNC simd_long(simd_ulong3 __x) { return (simd_long3)__x; } +static simd_long4 SIMD_CFUNC simd_long(simd_ulong4 __x) { return (simd_long4)__x; } +static simd_long8 SIMD_CFUNC simd_long(simd_ulong8 __x) { return (simd_long8)__x; } +static simd_long2 SIMD_CFUNC simd_long(simd_double2 __x) { return __builtin_convertvector(__x,simd_long2); } +static simd_long3 SIMD_CFUNC simd_long(simd_double3 __x) { return __builtin_convertvector(__x,simd_long3); } +static simd_long4 SIMD_CFUNC simd_long(simd_double4 __x) { return __builtin_convertvector(__x,simd_long4); } +static simd_long8 SIMD_CFUNC simd_long(simd_double8 __x) { return __builtin_convertvector(__x,simd_long8); } + +static simd_long2 SIMD_CFUNC simd_long_sat(simd_char2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_char3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_char4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_char8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_short2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_short3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_short4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_short8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_int2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_int3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_int4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_int8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_float2 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_float3 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_float4 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_float8 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63f)), 0x7fffffffffffffff, simd_long(__x >= 0x1.0p63f)); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_long2 __x) { return __x; } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_long3 __x) { return __x; } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_long4 __x) { return __x; } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_long8 __x) { return __x; } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_double2 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_double3 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_double4 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_double8 __x) { return simd_bitselect(simd_long(simd_max(__x,-0x1.0p63)), 0x7fffffffffffffff, __x >= 0x1.0p63); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uchar2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uchar3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uchar4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uchar8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ushort2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ushort3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ushort4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ushort8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_uint2 __x) { return simd_long(__x); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_uint3 __x) { return simd_long(__x); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_uint4 __x) { return simd_long(__x); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_uint8 __x) { return simd_long(__x); } +static simd_long2 SIMD_CFUNC simd_long_sat(simd_ulong2 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } +static simd_long3 SIMD_CFUNC simd_long_sat(simd_ulong3 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } +static simd_long4 SIMD_CFUNC simd_long_sat(simd_ulong4 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } +static simd_long8 SIMD_CFUNC simd_long_sat(simd_ulong8 __x) { return simd_long(simd_min(__x,0x7fffffffffffffff)); } + +static simd_long2 SIMD_CFUNC simd_long_rte(simd_double2 __x) { +#if defined __AVX512F__ + return _mm_cvtpd_epi64(__x); +#elif defined __arm64__ + return vcvtnq_s64_f64(__x); +#else + simd_double2 magic = __tg_copysign(0x1.0p52, __x); + simd_long2 x_is_small = __tg_fabs(__x) < 0x1.0p52; + return __builtin_convertvector(simd_bitselect(__x, (__x + magic) - magic, x_is_small & 0x7fffffffffffffff), simd_long2); +#endif +} + +static simd_long3 SIMD_CFUNC simd_long_rte(simd_double3 __x) { + return simd_make_long3(simd_long_rte(simd_make_double4_undef(__x))); +} + +static simd_long4 SIMD_CFUNC simd_long_rte(simd_double4 __x) { +#if defined __AVX512F__ + return _mm256_cvtpd_epi64(__x); +#else + return simd_make_long4(simd_long_rte(__x.lo), simd_long_rte(__x.hi)); +#endif +} + +static simd_long8 SIMD_CFUNC simd_long_rte(simd_double8 __x) { +#if defined __AVX512F__ + return _mm512_cvt_roundpd_epi64(__x, _MM_FROUND_RINT); +#else + return simd_make_long8(simd_long_rte(__x.lo), simd_long_rte(__x.hi)); +#endif +} + + +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_char2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_char3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_char4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_char8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uchar2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uchar3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uchar4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uchar8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_short2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_short3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_short4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_short8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ushort2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ushort3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ushort4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ushort8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_int2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_int3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_int4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_int8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_uint2 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_uint3 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_uint4 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_uint8 __x) { return simd_ulong(simd_long(__x)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_float2 __x) { simd_int2 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float2)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong2)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_float3 __x) { simd_int3 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float3)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong3)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_float4 __x) { simd_int4 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float4)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong4)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_float8 __x) { simd_int8 __big = __x >= 0x1.0p63f; return simd_ulong(simd_long(__x - simd_bitselect((simd_float8)0,0x1.0p63f,__big))) + simd_bitselect((simd_ulong8)0,0x8000000000000000,simd_long(__big)); } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_long2 __x) { return (simd_ulong2)__x; } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_long3 __x) { return (simd_ulong3)__x; } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_long4 __x) { return (simd_ulong4)__x; } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_long8 __x) { return (simd_ulong8)__x; } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_ulong2 __x) { return __x; } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_ulong3 __x) { return __x; } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_ulong4 __x) { return __x; } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_ulong8 __x) { return __x; } +static simd_ulong2 SIMD_CFUNC simd_ulong(simd_double2 __x) { simd_long2 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double2)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong2)0,0x8000000000000000,__big); } +static simd_ulong3 SIMD_CFUNC simd_ulong(simd_double3 __x) { simd_long3 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double3)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong3)0,0x8000000000000000,__big); } +static simd_ulong4 SIMD_CFUNC simd_ulong(simd_double4 __x) { simd_long4 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double4)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong4)0,0x8000000000000000,__big); } +static simd_ulong8 SIMD_CFUNC simd_ulong(simd_double8 __x) { simd_long8 __big = __x >= 0x1.0p63; return simd_ulong(simd_long(__x - simd_bitselect((simd_double8)0,0x1.0p63,__big))) + simd_bitselect((simd_ulong8)0,0x8000000000000000,__big); } + +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_char2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_char3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_char4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_char8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_short2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_short3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_short4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_short8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_int2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_int3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_int4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_int8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_float2 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_float3 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_float4 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_float8 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.f)), 0xffffffffffffffff, simd_long(__x >= 0x1.0p64f)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_long2 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_long3 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_long4 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_long8 __x) { return simd_ulong(simd_max(__x,0)); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_double2 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_double3 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_double4 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_double8 __x) { return simd_bitselect(simd_ulong(simd_max(__x,0.0)), 0xffffffffffffffff, __x >= 0x1.0p64); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uchar2 __x) { return simd_ulong(__x); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uchar3 __x) { return simd_ulong(__x); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uchar4 __x) { return simd_ulong(__x); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uchar8 __x) { return simd_ulong(__x); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ushort2 __x) { return simd_ulong(__x); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ushort3 __x) { return simd_ulong(__x); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ushort4 __x) { return simd_ulong(__x); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ushort8 __x) { return simd_ulong(__x); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_uint2 __x) { return simd_ulong(__x); } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_uint3 __x) { return simd_ulong(__x); } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_uint4 __x) { return simd_ulong(__x); } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_uint8 __x) { return simd_ulong(__x); } +static simd_ulong2 SIMD_CFUNC simd_ulong_sat(simd_ulong2 __x) { return __x; } +static simd_ulong3 SIMD_CFUNC simd_ulong_sat(simd_ulong3 __x) { return __x; } +static simd_ulong4 SIMD_CFUNC simd_ulong_sat(simd_ulong4 __x) { return __x; } +static simd_ulong8 SIMD_CFUNC simd_ulong_sat(simd_ulong8 __x) { return __x; } + + +static simd_double2 SIMD_CFUNC simd_double(simd_char2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_char3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_char4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_char8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_uchar2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_uchar3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_uchar4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_uchar8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_short2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_short3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_short4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_short8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_ushort2 __x) { return simd_double(simd_int(__x)); } +static simd_double3 SIMD_CFUNC simd_double(simd_ushort3 __x) { return simd_double(simd_int(__x)); } +static simd_double4 SIMD_CFUNC simd_double(simd_ushort4 __x) { return simd_double(simd_int(__x)); } +static simd_double8 SIMD_CFUNC simd_double(simd_ushort8 __x) { return simd_double(simd_int(__x)); } +static simd_double2 SIMD_CFUNC simd_double(simd_int2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_int3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_int4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_int8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_uint2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_uint3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_uint4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_uint8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_float2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_float3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_float4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_float8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_long2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_long3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_long4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_long8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_ulong2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_ulong3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_ulong4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_ulong8 __x) { return __builtin_convertvector(__x, simd_double8); } +static simd_double2 SIMD_CFUNC simd_double(simd_double2 __x) { return __builtin_convertvector(__x, simd_double2); } +static simd_double3 SIMD_CFUNC simd_double(simd_double3 __x) { return __builtin_convertvector(__x, simd_double3); } +static simd_double4 SIMD_CFUNC simd_double(simd_double4 __x) { return __builtin_convertvector(__x, simd_double4); } +static simd_double8 SIMD_CFUNC simd_double(simd_double8 __x) { return __builtin_convertvector(__x, simd_double8); } + + +#ifdef __cplusplus +} // extern "C" + +namespace simd { + +#if __has_feature(cxx_constexpr) +/*! @abstract Convert a vector to another vector of the ScalarType and the same number of elements. */ +template +static constexpr Vector_t::count> convert(typeN vector) +{ + if constexpr (traits::count == 1) + return static_cast::count>>(vector); + else if constexpr (std::is_same::value) + return simd_char(vector); + else if constexpr (std::is_same::value) + return simd_uchar(vector); + else if constexpr (std::is_same::value) + return simd_short(vector); + else if constexpr (std::is_same::value) + return simd_ushort(vector); + else if constexpr (std::is_same::value) + return simd_int(vector); + else if constexpr (std::is_same::value) + return simd_uint(vector); + else if constexpr (std::is_same::value) + return simd_long(vector); + else if constexpr (std::is_same::value) + return simd_ulong(vector); + else if constexpr (std::is_same::value) + return simd_float(vector); + else if constexpr (std::is_same::value) + return simd_double(vector); +} + +/*! @abstract Convert a vector to another vector of the ScalarType and the same number of elements with saturation. + * @discussion When the input value is too large to be represented in the return type, the input value + * will be saturated to the maximum value of the return type. */ +template +static constexpr Vector_t::count> convert_sat(typeN vector) +{ + static_assert(traits::count != 1); + if constexpr (std::is_same::value) + return simd_char_sat(vector); + else if constexpr (std::is_same::value) + return simd_uchar_sat(vector); + else if constexpr (std::is_same::value) + return simd_short_sat(vector); + else if constexpr (std::is_same::value) + return simd_ushort_sat(vector); + else if constexpr (std::is_same::value) + return simd_int_sat(vector); + else if constexpr (std::is_same::value) + return simd_uint_sat(vector); + else if constexpr (std::is_same::value) + return simd_long_sat(vector); + else if constexpr (std::is_same::value) + return simd_ulong_sat(vector); + else + return convert(vector); +} +#endif /* __has_feature(cxx_constexpr) */ + +} /* namespace simd */ +#endif // __cplusplus +#endif // SIMD_COMPILER_HAS_REQUIRED_FEATURES +#endif // __SIMD_CONVERSION_HEADER__ + diff --git a/vfsoverlay/extern.h b/vfsoverlay/extern.h new file mode 100644 index 00000000..b4b6b8f5 --- /dev/null +++ b/vfsoverlay/extern.h @@ -0,0 +1,49 @@ +/* Copyright (c) 2014 Apple, Inc. All rights reserved. */ + +#ifndef __SIMD_EXTERN_HEADER__ +#define __SIMD_EXTERN_HEADER__ + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#pragma mark - geometry +#if SIMD_LIBRARY_VERSION >= 2 +extern float _simd_orient_vf2(simd_float2, simd_float2); +extern float _simd_orient_pf2(simd_float2, simd_float2, simd_float2); +extern float _simd_incircle_pf2(simd_float2, simd_float2, simd_float2, simd_float2); + +extern float _simd_orient_vf3(simd_float3, simd_float3, simd_float3); +extern float _simd_orient_pf3(simd_float3, simd_float3, simd_float3, simd_float3); +extern float _simd_insphere_pf3(simd_float3, simd_float3, simd_float3, simd_float3, simd_float3); + +extern double _simd_orient_vd2(simd_double2, simd_double2); +extern double _simd_orient_pd2(simd_double2, simd_double2, simd_double2); +extern double _simd_incircle_pd2(simd_double2, simd_double2, simd_double2, simd_double2); + +/* The double3 variants of these functions take their arguments in a buffer + * to workaround the fact that double3 calling conventions are different + * depending on whether or not the executable has been compiled with AVX + * enabled. */ +extern double _simd_orient_vd3(const double *); +extern double _simd_orient_pd3(const double *); +extern double _simd_insphere_pd3(const double *); +#endif /* SIMD_LIBRARY_VERSION */ + +#pragma mark - matrix +extern simd_float2x2 __invert_f2(simd_float2x2); +extern simd_double2x2 __invert_d2(simd_double2x2); +extern simd_float3x3 __invert_f3(simd_float3x3); +extern simd_double3x3 __invert_d3(simd_double3x3); +extern simd_float4x4 __invert_f4(simd_float4x4); +extern simd_double4x4 __invert_d4(simd_double4x4); + +#ifdef __cplusplus +} +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* __SIMD_EXTERN_HEADER__ */ diff --git a/vfsoverlay/geometry.h b/vfsoverlay/geometry.h new file mode 100644 index 00000000..83b5a380 --- /dev/null +++ b/vfsoverlay/geometry.h @@ -0,0 +1,1100 @@ +/* Copyright (c) 2014-2017 Apple, Inc. All rights reserved. + * + * The interfaces declared in this header provide operations for mathematical + * vectors; these functions and macros operate on vectors of floating-point + * data only. + * + * Function Result + * ------------------------------------------------------------------ + * simd_dot(x,y) The dot product of x and y. + * + * simd_project(x,y) x projected onto y. There are two variants + * of this function, simd_precise_project + * and simd_fast_project. simd_project + * is equivalent to simd_precise_project + * unless you are compiling with -ffast-math + * specified, in which case it is equivalent + * to simd_fast_project. + * + * simd_length(x) The length (two-norm) of x. Undefined if + * x is poorly scaled such that an + * intermediate computation overflows or + * underflows. There are two variants + * of this function, simd_precise_length + * and simd_fast_length. simd_length + * is equivalent to simd_precise_length + * unless you are compiling with -ffast-math + * specified, in which case it is equivalent + * to simd_fast_length. + * + * simd_length_squared(x) The square of the length of x. If you + * simply need to compare relative magnitudes, + * use this instead of simd_length; it is + * faster than simd_fast_length and as + * accurate as simd_precise_length. + * + * simd_norm_one(x) The one-norm (sum of absolute values) of x. + * + * simd_norm_inf(x) The inf-norm (max absolute value) of x. + * + * simd_distance(x,y) The distance between x and y. Undefined if + * x and y are poorly scaled such that an + * intermediate computation overflows + * or underflows. There are two variants + * of this function, simd_precise_distance + * and simd_fast_distance. simd_distance + * is equivalent to simd_precise_distance + * unless you are compiling with -ffast-math + * specified, in which case it is equivalent + * to simd_fast_distance. + * + * simd_distance_squared(x,y) The square of the distance between x and y. + * + * simd_normalize(x) A vector pointing in the direction of x + * with length 1.0. Undefined if x is + * the zero vector, or if x is poorly scaled + * such that an intermediate computation + * overflows or underflows. There are two + * variants of this function, + * simd_precise_normalize and + * simd_fast_normalize. simd_normalize + * is equivalent to simd_precise_normalize + * unless you are compiling with -ffast-math + * specified, in which case it is equivalent + * to simd_fast_normalize. + * + * simd_cross(x,y) If x and y are vectors of dimension 3, + * the cross-product of x and y. + * + * If x and y are vectors of dimension 2, + * the cross-product of x and y interpreted as + * vectors in the z == 0 plane of a three- + * dimensional space. + * + * If x and y are vectors with a length that + * is neither 2 nor 3, this operation is not + * available. + * + * simd_reflect(x,n) Reflects x through the plane perpendicular + * to the normal vector n. Only available + * for vectors of length 2, 3, or 4. + * + * simd_refract(x,n,eta) Calculates the refraction direction given + * unit incident vector x, unit normal vector + * n, and index of refraction eta. If the + * angle between the incident vector and the + * surface normal is too great for the + * specified index of refraction, zero is + * returned. + * Available for vectors of length 2, 3, or 4. + * + * simd_orient(x,y,...) Return a positive value if the origin and + * their ordered arguments determine a positively + * oriented parallelepiped, zero if it is degenerate, + * and a negative value if it is negatively oriented. + * + * In C++ the following geometric functions are available in the simd:: + * namespace: + * + * C++ Function Equivalent C Function + * ----------------------------------------------------------- + * simd::dot(x,y) simd_dot(x,y) + * simd::project(x,y) simd_project(x,y) + * simd::length_squared(x) simd_length_squared(x) + * simd::length(x) simd_length(x) + * simd::distance_squared(x,y) simd_distance_squared(x,y) + * simd::norm_one(x) simd_norm_one(x) + * simd::norm_inf(x) simd_norm_inf(x) + * simd::distance(x,y) simd_distance(x,y) + * simd::normalize(x) simd_normalize(x) + * simd::cross(x,y) simd_cross(x,y) + * simd::reflect(x,n) simd_reflect(x,n) + * simd::refract(x,n,eta) simd_refract(x,n,eta) + * simd::orient(x,y,...) simd_orient(x,y,...) + * + * simd::precise::project(x,y) simd_precise_project(x,y) + * simd::precise::length(x) simd_precise_length(x) + * simd::precise::distance(x,y) simd_precise_distance(x,y) + * simd::precise::normalize(x) simd_precise_normalize(x) + * + * simd::fast::project(x,y) simd_fast_project(x,y) + * simd::fast::length(x) simd_fast_length(x) + * simd::fast::distance(x,y) simd_fast_distance(x,y) + * simd::fast::normalize(x) simd_fast_normalize(x) + */ + +#ifndef __SIMD_GEOMETRY_HEADER__ +#define __SIMD_GEOMETRY_HEADER__ + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static float SIMD_CFUNC simd_dot(simd_float2 __x, simd_float2 __y); +static float SIMD_CFUNC simd_dot(simd_float3 __x, simd_float3 __y); +static float SIMD_CFUNC simd_dot(simd_float4 __x, simd_float4 __y); +static float SIMD_CFUNC simd_dot(simd_float8 __x, simd_float8 __y); +static float SIMD_CFUNC simd_dot(simd_float16 __x, simd_float16 __y); +static double SIMD_CFUNC simd_dot(simd_double2 __x, simd_double2 __y); +static double SIMD_CFUNC simd_dot(simd_double3 __x, simd_double3 __y); +static double SIMD_CFUNC simd_dot(simd_double4 __x, simd_double4 __y); +static double SIMD_CFUNC simd_dot(simd_double8 __x, simd_double8 __y); +#define vector_dot simd_dot + +static simd_float2 SIMD_CFUNC simd_precise_project(simd_float2 __x, simd_float2 __y); +static simd_float3 SIMD_CFUNC simd_precise_project(simd_float3 __x, simd_float3 __y); +static simd_float4 SIMD_CFUNC simd_precise_project(simd_float4 __x, simd_float4 __y); +static simd_float8 SIMD_CFUNC simd_precise_project(simd_float8 __x, simd_float8 __y); +static simd_float16 SIMD_CFUNC simd_precise_project(simd_float16 __x, simd_float16 __y); +static simd_double2 SIMD_CFUNC simd_precise_project(simd_double2 __x, simd_double2 __y); +static simd_double3 SIMD_CFUNC simd_precise_project(simd_double3 __x, simd_double3 __y); +static simd_double4 SIMD_CFUNC simd_precise_project(simd_double4 __x, simd_double4 __y); +static simd_double8 SIMD_CFUNC simd_precise_project(simd_double8 __x, simd_double8 __y); +#define vector_precise_project simd_precise_project + +static simd_float2 SIMD_CFUNC simd_fast_project(simd_float2 __x, simd_float2 __y); +static simd_float3 SIMD_CFUNC simd_fast_project(simd_float3 __x, simd_float3 __y); +static simd_float4 SIMD_CFUNC simd_fast_project(simd_float4 __x, simd_float4 __y); +static simd_float8 SIMD_CFUNC simd_fast_project(simd_float8 __x, simd_float8 __y); +static simd_float16 SIMD_CFUNC simd_fast_project(simd_float16 __x, simd_float16 __y); +static simd_double2 SIMD_CFUNC simd_fast_project(simd_double2 __x, simd_double2 __y); +static simd_double3 SIMD_CFUNC simd_fast_project(simd_double3 __x, simd_double3 __y); +static simd_double4 SIMD_CFUNC simd_fast_project(simd_double4 __x, simd_double4 __y); +static simd_double8 SIMD_CFUNC simd_fast_project(simd_double8 __x, simd_double8 __y); +#define vector_fast_project simd_fast_project + +static simd_float2 SIMD_CFUNC simd_project(simd_float2 __x, simd_float2 __y); +static simd_float3 SIMD_CFUNC simd_project(simd_float3 __x, simd_float3 __y); +static simd_float4 SIMD_CFUNC simd_project(simd_float4 __x, simd_float4 __y); +static simd_float8 SIMD_CFUNC simd_project(simd_float8 __x, simd_float8 __y); +static simd_float16 SIMD_CFUNC simd_project(simd_float16 __x, simd_float16 __y); +static simd_double2 SIMD_CFUNC simd_project(simd_double2 __x, simd_double2 __y); +static simd_double3 SIMD_CFUNC simd_project(simd_double3 __x, simd_double3 __y); +static simd_double4 SIMD_CFUNC simd_project(simd_double4 __x, simd_double4 __y); +static simd_double8 SIMD_CFUNC simd_project(simd_double8 __x, simd_double8 __y); +#define vector_project simd_project + +static float SIMD_CFUNC simd_precise_length(simd_float2 __x); +static float SIMD_CFUNC simd_precise_length(simd_float3 __x); +static float SIMD_CFUNC simd_precise_length(simd_float4 __x); +static float SIMD_CFUNC simd_precise_length(simd_float8 __x); +static float SIMD_CFUNC simd_precise_length(simd_float16 __x); +static double SIMD_CFUNC simd_precise_length(simd_double2 __x); +static double SIMD_CFUNC simd_precise_length(simd_double3 __x); +static double SIMD_CFUNC simd_precise_length(simd_double4 __x); +static double SIMD_CFUNC simd_precise_length(simd_double8 __x); +#define vector_precise_length simd_precise_length + +static float SIMD_CFUNC simd_fast_length(simd_float2 __x); +static float SIMD_CFUNC simd_fast_length(simd_float3 __x); +static float SIMD_CFUNC simd_fast_length(simd_float4 __x); +static float SIMD_CFUNC simd_fast_length(simd_float8 __x); +static float SIMD_CFUNC simd_fast_length(simd_float16 __x); +static double SIMD_CFUNC simd_fast_length(simd_double2 __x); +static double SIMD_CFUNC simd_fast_length(simd_double3 __x); +static double SIMD_CFUNC simd_fast_length(simd_double4 __x); +static double SIMD_CFUNC simd_fast_length(simd_double8 __x); +#define vector_fast_length simd_fast_length + +static float SIMD_CFUNC simd_length(simd_float2 __x); +static float SIMD_CFUNC simd_length(simd_float3 __x); +static float SIMD_CFUNC simd_length(simd_float4 __x); +static float SIMD_CFUNC simd_length(simd_float8 __x); +static float SIMD_CFUNC simd_length(simd_float16 __x); +static double SIMD_CFUNC simd_length(simd_double2 __x); +static double SIMD_CFUNC simd_length(simd_double3 __x); +static double SIMD_CFUNC simd_length(simd_double4 __x); +static double SIMD_CFUNC simd_length(simd_double8 __x); +#define vector_length simd_length + +static float SIMD_CFUNC simd_length_squared(simd_float2 __x); +static float SIMD_CFUNC simd_length_squared(simd_float3 __x); +static float SIMD_CFUNC simd_length_squared(simd_float4 __x); +static float SIMD_CFUNC simd_length_squared(simd_float8 __x); +static float SIMD_CFUNC simd_length_squared(simd_float16 __x); +static double SIMD_CFUNC simd_length_squared(simd_double2 __x); +static double SIMD_CFUNC simd_length_squared(simd_double3 __x); +static double SIMD_CFUNC simd_length_squared(simd_double4 __x); +static double SIMD_CFUNC simd_length_squared(simd_double8 __x); +#define vector_length_squared simd_length_squared + +static float SIMD_CFUNC simd_norm_one(simd_float2 __x); +static float SIMD_CFUNC simd_norm_one(simd_float3 __x); +static float SIMD_CFUNC simd_norm_one(simd_float4 __x); +static float SIMD_CFUNC simd_norm_one(simd_float8 __x); +static float SIMD_CFUNC simd_norm_one(simd_float16 __x); +static double SIMD_CFUNC simd_norm_one(simd_double2 __x); +static double SIMD_CFUNC simd_norm_one(simd_double3 __x); +static double SIMD_CFUNC simd_norm_one(simd_double4 __x); +static double SIMD_CFUNC simd_norm_one(simd_double8 __x); +#define vector_norm_one simd_norm_one + +static float SIMD_CFUNC simd_norm_inf(simd_float2 __x); +static float SIMD_CFUNC simd_norm_inf(simd_float3 __x); +static float SIMD_CFUNC simd_norm_inf(simd_float4 __x); +static float SIMD_CFUNC simd_norm_inf(simd_float8 __x); +static float SIMD_CFUNC simd_norm_inf(simd_float16 __x); +static double SIMD_CFUNC simd_norm_inf(simd_double2 __x); +static double SIMD_CFUNC simd_norm_inf(simd_double3 __x); +static double SIMD_CFUNC simd_norm_inf(simd_double4 __x); +static double SIMD_CFUNC simd_norm_inf(simd_double8 __x); +#define vector_norm_inf simd_norm_inf + +static float SIMD_CFUNC simd_precise_distance(simd_float2 __x, simd_float2 __y); +static float SIMD_CFUNC simd_precise_distance(simd_float3 __x, simd_float3 __y); +static float SIMD_CFUNC simd_precise_distance(simd_float4 __x, simd_float4 __y); +static float SIMD_CFUNC simd_precise_distance(simd_float8 __x, simd_float8 __y); +static float SIMD_CFUNC simd_precise_distance(simd_float16 __x, simd_float16 __y); +static double SIMD_CFUNC simd_precise_distance(simd_double2 __x, simd_double2 __y); +static double SIMD_CFUNC simd_precise_distance(simd_double3 __x, simd_double3 __y); +static double SIMD_CFUNC simd_precise_distance(simd_double4 __x, simd_double4 __y); +static double SIMD_CFUNC simd_precise_distance(simd_double8 __x, simd_double8 __y); +#define vector_precise_distance simd_precise_distance + +static float SIMD_CFUNC simd_fast_distance(simd_float2 __x, simd_float2 __y); +static float SIMD_CFUNC simd_fast_distance(simd_float3 __x, simd_float3 __y); +static float SIMD_CFUNC simd_fast_distance(simd_float4 __x, simd_float4 __y); +static float SIMD_CFUNC simd_fast_distance(simd_float8 __x, simd_float8 __y); +static float SIMD_CFUNC simd_fast_distance(simd_float16 __x, simd_float16 __y); +static double SIMD_CFUNC simd_fast_distance(simd_double2 __x, simd_double2 __y); +static double SIMD_CFUNC simd_fast_distance(simd_double3 __x, simd_double3 __y); +static double SIMD_CFUNC simd_fast_distance(simd_double4 __x, simd_double4 __y); +static double SIMD_CFUNC simd_fast_distance(simd_double8 __x, simd_double8 __y); +#define vector_fast_distance simd_fast_distance + +static float SIMD_CFUNC simd_distance(simd_float2 __x, simd_float2 __y); +static float SIMD_CFUNC simd_distance(simd_float3 __x, simd_float3 __y); +static float SIMD_CFUNC simd_distance(simd_float4 __x, simd_float4 __y); +static float SIMD_CFUNC simd_distance(simd_float8 __x, simd_float8 __y); +static float SIMD_CFUNC simd_distance(simd_float16 __x, simd_float16 __y); +static double SIMD_CFUNC simd_distance(simd_double2 __x, simd_double2 __y); +static double SIMD_CFUNC simd_distance(simd_double3 __x, simd_double3 __y); +static double SIMD_CFUNC simd_distance(simd_double4 __x, simd_double4 __y); +static double SIMD_CFUNC simd_distance(simd_double8 __x, simd_double8 __y); +#define vector_distance simd_distance + +static float SIMD_CFUNC simd_distance_squared(simd_float2 __x, simd_float2 __y); +static float SIMD_CFUNC simd_distance_squared(simd_float3 __x, simd_float3 __y); +static float SIMD_CFUNC simd_distance_squared(simd_float4 __x, simd_float4 __y); +static float SIMD_CFUNC simd_distance_squared(simd_float8 __x, simd_float8 __y); +static float SIMD_CFUNC simd_distance_squared(simd_float16 __x, simd_float16 __y); +static double SIMD_CFUNC simd_distance_squared(simd_double2 __x, simd_double2 __y); +static double SIMD_CFUNC simd_distance_squared(simd_double3 __x, simd_double3 __y); +static double SIMD_CFUNC simd_distance_squared(simd_double4 __x, simd_double4 __y); +static double SIMD_CFUNC simd_distance_squared(simd_double8 __x, simd_double8 __y); +#define vector_distance_squared simd_distance_squared + +static simd_float2 SIMD_CFUNC simd_precise_normalize(simd_float2 __x); +static simd_float3 SIMD_CFUNC simd_precise_normalize(simd_float3 __x); +static simd_float4 SIMD_CFUNC simd_precise_normalize(simd_float4 __x); +static simd_float8 SIMD_CFUNC simd_precise_normalize(simd_float8 __x); +static simd_float16 SIMD_CFUNC simd_precise_normalize(simd_float16 __x); +static simd_double2 SIMD_CFUNC simd_precise_normalize(simd_double2 __x); +static simd_double3 SIMD_CFUNC simd_precise_normalize(simd_double3 __x); +static simd_double4 SIMD_CFUNC simd_precise_normalize(simd_double4 __x); +static simd_double8 SIMD_CFUNC simd_precise_normalize(simd_double8 __x); +#define vector_precise_normalize simd_precise_normalize + +static simd_float2 SIMD_CFUNC simd_fast_normalize(simd_float2 __x); +static simd_float3 SIMD_CFUNC simd_fast_normalize(simd_float3 __x); +static simd_float4 SIMD_CFUNC simd_fast_normalize(simd_float4 __x); +static simd_float8 SIMD_CFUNC simd_fast_normalize(simd_float8 __x); +static simd_float16 SIMD_CFUNC simd_fast_normalize(simd_float16 __x); +static simd_double2 SIMD_CFUNC simd_fast_normalize(simd_double2 __x); +static simd_double3 SIMD_CFUNC simd_fast_normalize(simd_double3 __x); +static simd_double4 SIMD_CFUNC simd_fast_normalize(simd_double4 __x); +static simd_double8 SIMD_CFUNC simd_fast_normalize(simd_double8 __x); +#define vector_fast_normalize simd_fast_normalize + +static simd_float2 SIMD_CFUNC simd_normalize(simd_float2 __x); +static simd_float3 SIMD_CFUNC simd_normalize(simd_float3 __x); +static simd_float4 SIMD_CFUNC simd_normalize(simd_float4 __x); +static simd_float8 SIMD_CFUNC simd_normalize(simd_float8 __x); +static simd_float16 SIMD_CFUNC simd_normalize(simd_float16 __x); +static simd_double2 SIMD_CFUNC simd_normalize(simd_double2 __x); +static simd_double3 SIMD_CFUNC simd_normalize(simd_double3 __x); +static simd_double4 SIMD_CFUNC simd_normalize(simd_double4 __x); +static simd_double8 SIMD_CFUNC simd_normalize(simd_double8 __x); +#define vector_normalize simd_normalize + +static simd_float3 SIMD_CFUNC simd_cross(simd_float2 __x, simd_float2 __y); +static simd_float3 SIMD_CFUNC simd_cross(simd_float3 __x, simd_float3 __y); +static simd_double3 SIMD_CFUNC simd_cross(simd_double2 __x, simd_double2 __y); +static simd_double3 SIMD_CFUNC simd_cross(simd_double3 __x, simd_double3 __y); +#define vector_cross simd_cross + +static simd_float2 SIMD_CFUNC simd_reflect(simd_float2 __x, simd_float2 __n); +static simd_float3 SIMD_CFUNC simd_reflect(simd_float3 __x, simd_float3 __n); +static simd_float4 SIMD_CFUNC simd_reflect(simd_float4 __x, simd_float4 __n); +static simd_double2 SIMD_CFUNC simd_reflect(simd_double2 __x, simd_double2 __n); +static simd_double3 SIMD_CFUNC simd_reflect(simd_double3 __x, simd_double3 __n); +static simd_double4 SIMD_CFUNC simd_reflect(simd_double4 __x, simd_double4 __n); +#define vector_reflect simd_reflect + +static simd_float2 SIMD_CFUNC simd_refract(simd_float2 __x, simd_float2 __n, float __eta); +static simd_float3 SIMD_CFUNC simd_refract(simd_float3 __x, simd_float3 __n, float __eta); +static simd_float4 SIMD_CFUNC simd_refract(simd_float4 __x, simd_float4 __n, float __eta); +static simd_double2 SIMD_CFUNC simd_refract(simd_double2 __x, simd_double2 __n, double __eta); +static simd_double3 SIMD_CFUNC simd_refract(simd_double3 __x, simd_double3 __n, double __eta); +static simd_double4 SIMD_CFUNC simd_refract(simd_double4 __x, simd_double4 __n, double __eta); +#define vector_refract simd_refract + +#if SIMD_LIBRARY_VERSION >= 2 +/* These functions require that you are building for OS X 10.12 or later, + * iOS 10.0 or later, watchOS 3.0 or later, and tvOS 10.0 or later. On + * earlier OS versions, the library functions that implement these + * operations are not available. */ + +/*! @functiongroup vector orientation + * + * @discussion These functions return a positive value if the origin and + * their ordered arguments determine a positively oriented parallelepiped, + * zero if it is degenerate, and a negative value if it is negatively + * oriented. This is equivalent to saying that the matrix with rows equal + * to the vectors has a positive, zero, or negative determinant, + * respectively. + * + * Naive evaluation of the determinant is prone to producing incorrect + * results if the vectors are nearly degenerate (e.g. floating-point + * rounding might cause the determinant to be zero or negative when + * the points are very nearly coplanar but positively oriented). If + * the vectors are very large or small, computing the determininat is + * also prone to premature overflow, which may cause the result to be + * NaN even though the vectors contain normal floating-point numbers. + * + * These routines take care to avoid those issues and always return a + * result with correct sign, even when the problem is very ill- + * conditioned. */ + +/*! @abstract Test the orientation of two 2d vectors. + * + * @param __x The first vector. + * @param __y The second vector. + * + * @result Positive if (x, y) are positively oriented, zero if they are + * colinear, and negative if they are negatively oriented. + * + * @discussion For two-dimensional vectors, "positively oriented" is + * equivalent to the ordering (0, x, y) proceeding counter-clockwise + * when viewed down the z axis, or to the cross product of x and y + * extended to three-dimensions having positive z-component. */ +static float SIMD_CFUNC simd_orient(simd_float2 __x, simd_float2 __y); + +/*! @abstract Test the orientation of two 2d vectors. + * + * @param __x The first vector. + * @param __y The second vector. + * + * @result Positive if (x, y) are positively oriented, zero if they are + * colinear, and negative if they are negatively oriented. + * + * @discussion For two-dimensional vectors, "positively oriented" is + * equivalent to the ordering (0, x, y) proceeding counter- clockwise + * when viewed down the z axis, or to the cross product of x and y + * extended to three-dimensions having positive z-component. */ +static double SIMD_CFUNC simd_orient(simd_double2 __x, simd_double2 __y); + +/*! @abstract Test the orientation of three 3d vectors. + * + * @param __x The first vector. + * @param __y The second vector. + * @param __z The third vector. + * + * @result Positive if (x, y, z) are positively oriented, zero if they + * are coplanar, and negative if they are negatively oriented. + * + * @discussion For three-dimensional vectors, "positively oriented" is + * equivalent to the ordering (x, y, z) following the "right hand rule", + * or to the dot product of z with the cross product of x and y being + * positive. */ +static float SIMD_CFUNC simd_orient(simd_float3 __x, simd_float3 __y, simd_float3 __z); + +/*! @abstract Test the orientation of three 3d vectors. + * + * @param __x The first vector. + * @param __y The second vector. + * @param __z The third vector. + * + * @result Positive if (x, y, c) are positively oriented, zero if they + * are coplanar, and negative if they are negatively oriented. + * + * @discussion For three-dimensional vectors, "positively oriented" is + * equivalent to the ordering (x, y, z) following the "right hand rule", + * or to the dot product of z with the cross product of x and y being + * positive. */ +static double SIMD_CFUNC simd_orient(simd_double3 __x, simd_double3 __y, simd_double3 __z); + +/*! @functiongroup point (affine) orientation + * + * @discussion These functions return a positive value if their ordered + * arguments determine a positively oriented parallelepiped, zero if it + * is degenerate, and a negative value if it is negatively oriented. + * + * simd_orient(a, b, c) is formally equivalent to simd_orient(b-a, c-a), + * but it is not effected by rounding error from subtraction of points, + * as that implementation would be. Care is taken so that the sign of + * the result is always correct, even if the problem is ill-conditioned. */ + +/*! @abstract Test the orientation of a triangle in 2d. + * + * @param __a The first point of the triangle. + * @param __b The second point of the triangle. + * @param __c The third point of the triangle. + * + * @result Positive if the triangle is positively oriented, zero if it + * is degenerate (three points in a line), and negative if it is negatively + * oriented. + * + * @discussion "Positively oriented" is equivalent to the ordering + * (a, b, c) proceeding counter-clockwise when viewed down the z axis, + * or to the cross product of a-c and b-c extended to three-dimensions + * having positive z-component. */ +static float SIMD_CFUNC simd_orient(simd_float2 __a, simd_float2 __b, simd_float2 __c); + +/*! @abstract Test the orientation of a triangle in 2d. + * + * @param __a The first point of the triangle. + * @param __b The second point of the triangle. + * @param __c The third point of the triangle. + * + * @result Positive if the triangle is positively oriented, zero if it + * is degenerate (three points in a line), and negative if it is negatively + * oriented. + * + * @discussion "Positively oriented" is equivalent to the ordering + * (a, b, c) proceeding counter-clockwise when viewed down the z axis, + * or to the cross product of a-c and b-c extended to three-dimensions + * having positive z-component. */ +static double SIMD_CFUNC simd_orient(simd_double2 __a, simd_double2 __b, simd_double2 __c); + +/*! @abstract Test the orientation of a tetrahedron in 3d. + * + * @param __a The first point of the tetrahedron. + * @param __b The second point of the tetrahedron. + * @param __c The third point of the tetrahedron. + * @param __d The fourth point of the tetrahedron. + * + * @result Positive if the tetrahedron is positively oriented, zero if it + * is degenerate (four points in a plane), and negative if it is negatively + * oriented. + * + * @discussion "Positively oriented" is equivalent to the vectors + * (a-d, b-d, c-d) following the "right hand rule", or to the dot product + * of c-d with the the cross product of a-d and b-d being positive. */ +static float SIMD_CFUNC simd_orient(simd_float3 __a, simd_float3 __b, simd_float3 __c, simd_float3 __d); + +/*! @abstract Test the orientation of a tetrahedron in 3d. + * + * @param __a The first point of the tetrahedron. + * @param __b The second point of the tetrahedron. + * @param __c The third point of the tetrahedron. + * @param __d The fourth point of the tetrahedron. + * + * @result Positive if the tetrahedron is positively oriented, zero if it + * is degenerate (four points in a plane), and negative if it is negatively + * oriented. + * + * @discussion "Positively oriented" is equivalent to the vectors + * (a-d, b-d, c-d) following the "right hand rule", or to the dot product + * of c-d with the the cross product of a-d and b-d being positive. */ +static double SIMD_CFUNC simd_orient(simd_double3 __a, simd_double3 __b, simd_double3 __c, simd_double3 __d); + +/*! @functiongroup incircle (points) tests + * + * @discussion These functions determine whether the point x is inside, on, + * or outside the circle or sphere passing through a group of points. If + * x is inside the circle, the result is positive; if x is on the circle, + * the result is zero; if x is outside the circle the result is negative. + * + * These functions are always exact, even if the problem is ill- + * conditioned (meaning that the points are nearly co-linear or + * co-planar). + * + * If the points are negatively-oriented, the the notions of "inside" and + * "outside" are flipped. If the points are degenerate, then the result + * is undefined. */ + +/*! @abstract Test if x lies inside, on, or outside the circle passing + * through a, b, and c. + * + * @param __x The point being tested. + * @param __a The first point determining the circle. + * @param __b The second point determining the circle. + * @param __c The third point determining the circle. + * + * @result Assuming that (a,b,c) are positively-oriented, positive if x is + * inside the circle, zero if x is on the circle, and negative if x is + * outside the circle. The sign of the result is flipped if (a,b,c) are + * negatively-oriented. */ +static float SIMD_CFUNC simd_incircle(simd_float2 __x, simd_float2 __a, simd_float2 __b, simd_float2 __c); + +/*! @abstract Test if x lies inside, on, or outside the circle passing + * through a, b, and c. + * + * @param __x The point being tested. + * @param __a The first point determining the circle. + * @param __b The second point determining the circle. + * @param __c The third point determining the circle. + * + * @result Assuming that (a,b,c) are positively-oriented, positive if x is + * inside the circle, zero if x is on the circle, and negative if x is + * outside the circle. The sign of the result is flipped if (a,b,c) are + * negatively-oriented. */ +static double SIMD_CFUNC simd_incircle(simd_double2 __x, simd_double2 __a, simd_double2 __b, simd_double2 __c); + +/*! @abstract Test if x lies inside, on, or outside the sphere passing + * through a, b, c, and d. + * + * @param __x The point being tested. + * @param __a The first point determining the sphere. + * @param __b The second point determining the sphere. + * @param __c The third point determining the sphere. + * @param __d The fourth point determining the sphere. + * + * @result Assuming that the points are positively-oriented, positive if x + * is inside the sphere, zero if x is on the sphere, and negative if x is + * outside the sphere. The sign of the result is flipped if the points are + * negatively-oriented. */ +static float SIMD_CFUNC simd_insphere(simd_float3 __x, simd_float3 __a, simd_float3 __b, simd_float3 __c, simd_float3 __d); + +/*! @abstract Test if x lies inside, on, or outside the sphere passing + * through a, b, c, and d. + * + * @param __x The point being tested. + * @param __a The first point determining the sphere. + * @param __b The second point determining the sphere. + * @param __c The third point determining the sphere. + * @param __d The fourth point determining the sphere. + * + * @result Assuming that the points are positively-oriented, positive if x + * is inside the sphere, zero if x is on the sphere, and negative if x is + * outside the sphere. The sign of the result is flipped if the points are + * negatively-oriented. */ +static double SIMD_CFUNC simd_insphere(simd_double3 __x, simd_double3 __a, simd_double3 __b, simd_double3 __c, simd_double3 __d); +#endif /* SIMD_LIBRARY_VERSION */ + +#ifdef __cplusplus +} /* extern "C" */ + +namespace simd { + static SIMD_CPPFUNC float dot(const float2 x, const float2 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC float dot(const float3 x, const float3 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC float dot(const float4 x, const float4 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC float dot(const float8 x, const float8 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC float dot(const float16 x, const float16 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC double dot(const double2 x, const double2 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC double dot(const double3 x, const double3 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC double dot(const double4 x, const double4 y) { return ::simd_dot(x, y); } + static SIMD_CPPFUNC double dot(const double8 x, const double8 y) { return ::simd_dot(x, y); } + + static SIMD_CPPFUNC float2 project(const float2 x, const float2 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC float3 project(const float3 x, const float3 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC float4 project(const float4 x, const float4 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC float8 project(const float8 x, const float8 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC float16 project(const float16 x, const float16 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC double2 project(const double2 x, const double2 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC double3 project(const double3 x, const double3 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC double4 project(const double4 x, const double4 y) { return ::simd_project(x, y); } + static SIMD_CPPFUNC double8 project(const double8 x, const double8 y) { return ::simd_project(x, y); } + + static SIMD_CPPFUNC float length_squared(const float2 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC float length_squared(const float3 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC float length_squared(const float4 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC float length_squared(const float8 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC float length_squared(const float16 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC double length_squared(const double2 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC double length_squared(const double3 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC double length_squared(const double4 x) { return ::simd_length_squared(x); } + static SIMD_CPPFUNC double length_squared(const double8 x) { return ::simd_length_squared(x); } + + static SIMD_CPPFUNC float norm_one(const float2 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC float norm_one(const float3 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC float norm_one(const float4 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC float norm_one(const float8 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC float norm_one(const float16 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC double norm_one(const double2 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC double norm_one(const double3 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC double norm_one(const double4 x) { return ::simd_norm_one(x); } + static SIMD_CPPFUNC double norm_one(const double8 x) { return ::simd_norm_one(x); } + + static SIMD_CPPFUNC float norm_inf(const float2 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC float norm_inf(const float3 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC float norm_inf(const float4 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC float norm_inf(const float8 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC float norm_inf(const float16 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC double norm_inf(const double2 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC double norm_inf(const double3 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC double norm_inf(const double4 x) { return ::simd_norm_inf(x); } + static SIMD_CPPFUNC double norm_inf(const double8 x) { return ::simd_norm_inf(x); } + + static SIMD_CPPFUNC float length(const float2 x) { return ::simd_length(x); } + static SIMD_CPPFUNC float length(const float3 x) { return ::simd_length(x); } + static SIMD_CPPFUNC float length(const float4 x) { return ::simd_length(x); } + static SIMD_CPPFUNC float length(const float8 x) { return ::simd_length(x); } + static SIMD_CPPFUNC float length(const float16 x) { return ::simd_length(x); } + static SIMD_CPPFUNC double length(const double2 x) { return ::simd_length(x); } + static SIMD_CPPFUNC double length(const double3 x) { return ::simd_length(x); } + static SIMD_CPPFUNC double length(const double4 x) { return ::simd_length(x); } + static SIMD_CPPFUNC double length(const double8 x) { return ::simd_length(x); } + + static SIMD_CPPFUNC float distance_squared(const float2 x, const float2 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC float distance_squared(const float3 x, const float3 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC float distance_squared(const float4 x, const float4 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC float distance_squared(const float8 x, const float8 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC float distance_squared(const float16 x, const float16 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC double distance_squared(const double2 x, const double2 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC double distance_squared(const double3 x, const double3 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC double distance_squared(const double4 x, const double4 y) { return ::simd_distance_squared(x, y); } + static SIMD_CPPFUNC double distance_squared(const double8 x, const double8 y) { return ::simd_distance_squared(x, y); } + + static SIMD_CPPFUNC float distance(const float2 x, const float2 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC float distance(const float3 x, const float3 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC float distance(const float4 x, const float4 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC float distance(const float8 x, const float8 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC float distance(const float16 x, const float16 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC double distance(const double2 x, const double2 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC double distance(const double3 x, const double3 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC double distance(const double4 x, const double4 y) { return ::simd_distance(x, y); } + static SIMD_CPPFUNC double distance(const double8 x, const double8 y) { return ::simd_distance(x, y); } + + static SIMD_CPPFUNC float2 normalize(const float2 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC float3 normalize(const float3 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC float4 normalize(const float4 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC float8 normalize(const float8 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC float16 normalize(const float16 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC double2 normalize(const double2 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC double3 normalize(const double3 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC double4 normalize(const double4 x) { return ::simd_normalize(x); } + static SIMD_CPPFUNC double8 normalize(const double8 x) { return ::simd_normalize(x); } + + static SIMD_CPPFUNC float3 cross(const float2 x, const float2 y) { return ::simd_cross(x,y); } + static SIMD_CPPFUNC float3 cross(const float3 x, const float3 y) { return ::simd_cross(x,y); } + static SIMD_CPPFUNC double3 cross(const double2 x, const double2 y) { return ::simd_cross(x,y); } + static SIMD_CPPFUNC double3 cross(const double3 x, const double3 y) { return ::simd_cross(x,y); } + + static SIMD_CPPFUNC float2 reflect(const float2 x, const float2 n) { return ::simd_reflect(x,n); } + static SIMD_CPPFUNC float3 reflect(const float3 x, const float3 n) { return ::simd_reflect(x,n); } + static SIMD_CPPFUNC float4 reflect(const float4 x, const float4 n) { return ::simd_reflect(x,n); } + static SIMD_CPPFUNC double2 reflect(const double2 x, const double2 n) { return ::simd_reflect(x,n); } + static SIMD_CPPFUNC double3 reflect(const double3 x, const double3 n) { return ::simd_reflect(x,n); } + static SIMD_CPPFUNC double4 reflect(const double4 x, const double4 n) { return ::simd_reflect(x,n); } + + static SIMD_CPPFUNC float2 refract(const float2 x, const float2 n, const float eta) { return ::simd_refract(x,n,eta); } + static SIMD_CPPFUNC float3 refract(const float3 x, const float3 n, const float eta) { return ::simd_refract(x,n,eta); } + static SIMD_CPPFUNC float4 refract(const float4 x, const float4 n, const float eta) { return ::simd_refract(x,n,eta); } + static SIMD_CPPFUNC double2 refract(const double2 x, const double2 n, const float eta) { return ::simd_refract(x,n,eta); } + static SIMD_CPPFUNC double3 refract(const double3 x, const double3 n, const float eta) { return ::simd_refract(x,n,eta); } + static SIMD_CPPFUNC double4 refract(const double4 x, const double4 n, const float eta) { return ::simd_refract(x,n,eta); } + +#if SIMD_LIBRARY_VERSION >= 2 + static SIMD_CPPFUNC float orient(const float2 x, const float2 y) { return ::simd_orient(x,y); } + static SIMD_CPPFUNC float orient(const float2 a, const float2 b, const float2 c) { return ::simd_orient(a,b,c); } + static SIMD_CPPFUNC float orient(const float3 x, const float3 y, const float3 z) { return ::simd_orient(x,y,z); } + static SIMD_CPPFUNC float orient(const float3 a, const float3 b, const float3 c, const float3 d) { return ::simd_orient(a,b,c,d); } + static SIMD_CPPFUNC double orient(const double2 x, const double2 y) { return ::simd_orient(x,y); } + static SIMD_CPPFUNC double orient(const double2 a, const double2 b, const double2 c) { return ::simd_orient(a,b,c); } + static SIMD_CPPFUNC double orient(const double3 x, const double3 y, const double3 z) { return ::simd_orient(x,y,z); } + static SIMD_CPPFUNC double orient(const double3 a, const double3 b, const double3 c, const double3 d) { return ::simd_orient(a,b,c,d); } +#endif + + /* precise and fast sub-namespaces */ + namespace precise { + static SIMD_CPPFUNC float2 project(const float2 x, const float2 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC float3 project(const float3 x, const float3 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC float4 project(const float4 x, const float4 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC float8 project(const float8 x, const float8 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC float16 project(const float16 x, const float16 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC double2 project(const double2 x, const double2 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC double3 project(const double3 x, const double3 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC double4 project(const double4 x, const double4 y) { return ::simd_precise_project(x, y); } + static SIMD_CPPFUNC double8 project(const double8 x, const double8 y) { return ::simd_precise_project(x, y); } + + static SIMD_CPPFUNC float length(const float2 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC float length(const float3 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC float length(const float4 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC float length(const float8 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC float length(const float16 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC double length(const double2 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC double length(const double3 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC double length(const double4 x) { return ::simd_precise_length(x); } + static SIMD_CPPFUNC double length(const double8 x) { return ::simd_precise_length(x); } + + static SIMD_CPPFUNC float distance(const float2 x, const float2 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC float distance(const float3 x, const float3 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC float distance(const float4 x, const float4 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC float distance(const float8 x, const float8 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC float distance(const float16 x, const float16 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC double distance(const double2 x, const double2 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC double distance(const double3 x, const double3 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC double distance(const double4 x, const double4 y) { return ::simd_precise_distance(x, y); } + static SIMD_CPPFUNC double distance(const double8 x, const double8 y) { return ::simd_precise_distance(x, y); } + + static SIMD_CPPFUNC float2 normalize(const float2 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC float3 normalize(const float3 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC float4 normalize(const float4 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC float8 normalize(const float8 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC float16 normalize(const float16 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC double2 normalize(const double2 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC double3 normalize(const double3 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC double4 normalize(const double4 x) { return ::simd_precise_normalize(x); } + static SIMD_CPPFUNC double8 normalize(const double8 x) { return ::simd_precise_normalize(x); } + } + + namespace fast { + static SIMD_CPPFUNC float2 project(const float2 x, const float2 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC float3 project(const float3 x, const float3 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC float4 project(const float4 x, const float4 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC float8 project(const float8 x, const float8 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC float16 project(const float16 x, const float16 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC double2 project(const double2 x, const double2 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC double3 project(const double3 x, const double3 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC double4 project(const double4 x, const double4 y) { return ::simd_fast_project(x, y); } + static SIMD_CPPFUNC double8 project(const double8 x, const double8 y) { return ::simd_fast_project(x, y); } + + static SIMD_CPPFUNC float length(const float2 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC float length(const float3 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC float length(const float4 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC float length(const float8 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC float length(const float16 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC double length(const double2 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC double length(const double3 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC double length(const double4 x) { return ::simd_fast_length(x); } + static SIMD_CPPFUNC double length(const double8 x) { return ::simd_fast_length(x); } + + static SIMD_CPPFUNC float distance(const float2 x, const float2 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC float distance(const float3 x, const float3 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC float distance(const float4 x, const float4 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC float distance(const float8 x, const float8 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC float distance(const float16 x, const float16 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC double distance(const double2 x, const double2 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC double distance(const double3 x, const double3 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC double distance(const double4 x, const double4 y) { return ::simd_fast_distance(x, y); } + static SIMD_CPPFUNC double distance(const double8 x, const double8 y) { return ::simd_fast_distance(x, y); } + + static SIMD_CPPFUNC float2 normalize(const float2 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC float3 normalize(const float3 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC float4 normalize(const float4 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC float8 normalize(const float8 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC float16 normalize(const float16 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC double2 normalize(const double2 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC double3 normalize(const double3 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC double4 normalize(const double4 x) { return ::simd_fast_normalize(x); } + static SIMD_CPPFUNC double8 normalize(const double8 x) { return ::simd_fast_normalize(x); } + } +} + +extern "C" { +#endif /* __cplusplus */ + +#pragma mark - Implementation + +static float SIMD_CFUNC simd_dot(simd_float2 __x, simd_float2 __y) { return simd_reduce_add(__x*__y); } +static float SIMD_CFUNC simd_dot(simd_float3 __x, simd_float3 __y) { return simd_reduce_add(__x*__y); } +static float SIMD_CFUNC simd_dot(simd_float4 __x, simd_float4 __y) { return simd_reduce_add(__x*__y); } +static float SIMD_CFUNC simd_dot(simd_float8 __x, simd_float8 __y) { return simd_reduce_add(__x*__y); } +static float SIMD_CFUNC simd_dot(simd_float16 __x, simd_float16 __y) { return simd_reduce_add(__x*__y); } +static double SIMD_CFUNC simd_dot(simd_double2 __x, simd_double2 __y) { return simd_reduce_add(__x*__y); } +static double SIMD_CFUNC simd_dot(simd_double3 __x, simd_double3 __y) { return simd_reduce_add(__x*__y); } +static double SIMD_CFUNC simd_dot(simd_double4 __x, simd_double4 __y) { return simd_reduce_add(__x*__y); } +static double SIMD_CFUNC simd_dot(simd_double8 __x, simd_double8 __y) { return simd_reduce_add(__x*__y); } + +static simd_float2 SIMD_CFUNC simd_precise_project(simd_float2 __x, simd_float2 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_float3 SIMD_CFUNC simd_precise_project(simd_float3 __x, simd_float3 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_float4 SIMD_CFUNC simd_precise_project(simd_float4 __x, simd_float4 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_float8 SIMD_CFUNC simd_precise_project(simd_float8 __x, simd_float8 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_float16 SIMD_CFUNC simd_precise_project(simd_float16 __x, simd_float16 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_double2 SIMD_CFUNC simd_precise_project(simd_double2 __x, simd_double2 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_double3 SIMD_CFUNC simd_precise_project(simd_double3 __x, simd_double3 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_double4 SIMD_CFUNC simd_precise_project(simd_double4 __x, simd_double4 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } +static simd_double8 SIMD_CFUNC simd_precise_project(simd_double8 __x, simd_double8 __y) { return simd_dot(__x,__y)/simd_dot(__y,__y)*__y; } + +static simd_float2 SIMD_CFUNC simd_fast_project(simd_float2 __x, simd_float2 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_float3 SIMD_CFUNC simd_fast_project(simd_float3 __x, simd_float3 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_float4 SIMD_CFUNC simd_fast_project(simd_float4 __x, simd_float4 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_float8 SIMD_CFUNC simd_fast_project(simd_float8 __x, simd_float8 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_float16 SIMD_CFUNC simd_fast_project(simd_float16 __x, simd_float16 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_double2 SIMD_CFUNC simd_fast_project(simd_double2 __x, simd_double2 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_double3 SIMD_CFUNC simd_fast_project(simd_double3 __x, simd_double3 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_double4 SIMD_CFUNC simd_fast_project(simd_double4 __x, simd_double4 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } +static simd_double8 SIMD_CFUNC simd_fast_project(simd_double8 __x, simd_double8 __y) { return __y*simd_dot(__x,__y)*simd_fast_recip(simd_dot(__y,__y)); } + +#if defined __FAST_MATH__ +static simd_float2 SIMD_CFUNC simd_project(simd_float2 __x, simd_float2 __y) { return simd_fast_project(__x,__y); } +static simd_float3 SIMD_CFUNC simd_project(simd_float3 __x, simd_float3 __y) { return simd_fast_project(__x,__y); } +static simd_float4 SIMD_CFUNC simd_project(simd_float4 __x, simd_float4 __y) { return simd_fast_project(__x,__y); } +static simd_float8 SIMD_CFUNC simd_project(simd_float8 __x, simd_float8 __y) { return simd_fast_project(__x,__y); } +static simd_float16 SIMD_CFUNC simd_project(simd_float16 __x, simd_float16 __y) { return simd_fast_project(__x,__y); } +static simd_double2 SIMD_CFUNC simd_project(simd_double2 __x, simd_double2 __y) { return simd_fast_project(__x,__y); } +static simd_double3 SIMD_CFUNC simd_project(simd_double3 __x, simd_double3 __y) { return simd_fast_project(__x,__y); } +static simd_double4 SIMD_CFUNC simd_project(simd_double4 __x, simd_double4 __y) { return simd_fast_project(__x,__y); } +static simd_double8 SIMD_CFUNC simd_project(simd_double8 __x, simd_double8 __y) { return simd_fast_project(__x,__y); } +#else +static simd_float2 SIMD_CFUNC simd_project(simd_float2 __x, simd_float2 __y) { return simd_precise_project(__x,__y); } +static simd_float3 SIMD_CFUNC simd_project(simd_float3 __x, simd_float3 __y) { return simd_precise_project(__x,__y); } +static simd_float4 SIMD_CFUNC simd_project(simd_float4 __x, simd_float4 __y) { return simd_precise_project(__x,__y); } +static simd_float8 SIMD_CFUNC simd_project(simd_float8 __x, simd_float8 __y) { return simd_precise_project(__x,__y); } +static simd_float16 SIMD_CFUNC simd_project(simd_float16 __x, simd_float16 __y) { return simd_precise_project(__x,__y); } +static simd_double2 SIMD_CFUNC simd_project(simd_double2 __x, simd_double2 __y) { return simd_precise_project(__x,__y); } +static simd_double3 SIMD_CFUNC simd_project(simd_double3 __x, simd_double3 __y) { return simd_precise_project(__x,__y); } +static simd_double4 SIMD_CFUNC simd_project(simd_double4 __x, simd_double4 __y) { return simd_precise_project(__x,__y); } +static simd_double8 SIMD_CFUNC simd_project(simd_double8 __x, simd_double8 __y) { return simd_precise_project(__x,__y); } +#endif + +static float SIMD_CFUNC simd_precise_length(simd_float2 __x) { return sqrtf(simd_length_squared(__x)); } +static float SIMD_CFUNC simd_precise_length(simd_float3 __x) { return sqrtf(simd_length_squared(__x)); } +static float SIMD_CFUNC simd_precise_length(simd_float4 __x) { return sqrtf(simd_length_squared(__x)); } +static float SIMD_CFUNC simd_precise_length(simd_float8 __x) { return sqrtf(simd_length_squared(__x)); } +static float SIMD_CFUNC simd_precise_length(simd_float16 __x) { return sqrtf(simd_length_squared(__x)); } +static double SIMD_CFUNC simd_precise_length(simd_double2 __x) { return sqrt(simd_length_squared(__x)); } +static double SIMD_CFUNC simd_precise_length(simd_double3 __x) { return sqrt(simd_length_squared(__x)); } +static double SIMD_CFUNC simd_precise_length(simd_double4 __x) { return sqrt(simd_length_squared(__x)); } +static double SIMD_CFUNC simd_precise_length(simd_double8 __x) { return sqrt(simd_length_squared(__x)); } + +static float SIMD_CFUNC simd_fast_length(simd_float2 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_fast_length(simd_float3 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_fast_length(simd_float4 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_fast_length(simd_float8 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_fast_length(simd_float16 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_fast_length(simd_double2 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_fast_length(simd_double3 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_fast_length(simd_double4 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_fast_length(simd_double8 __x) { return simd_precise_length(__x); } + +#if defined __FAST_MATH__ +static float SIMD_CFUNC simd_length(simd_float2 __x) { return simd_fast_length(__x); } +static float SIMD_CFUNC simd_length(simd_float3 __x) { return simd_fast_length(__x); } +static float SIMD_CFUNC simd_length(simd_float4 __x) { return simd_fast_length(__x); } +static float SIMD_CFUNC simd_length(simd_float8 __x) { return simd_fast_length(__x); } +static float SIMD_CFUNC simd_length(simd_float16 __x) { return simd_fast_length(__x); } +static double SIMD_CFUNC simd_length(simd_double2 __x) { return simd_fast_length(__x); } +static double SIMD_CFUNC simd_length(simd_double3 __x) { return simd_fast_length(__x); } +static double SIMD_CFUNC simd_length(simd_double4 __x) { return simd_fast_length(__x); } +static double SIMD_CFUNC simd_length(simd_double8 __x) { return simd_fast_length(__x); } +#else +static float SIMD_CFUNC simd_length(simd_float2 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_length(simd_float3 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_length(simd_float4 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_length(simd_float8 __x) { return simd_precise_length(__x); } +static float SIMD_CFUNC simd_length(simd_float16 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_length(simd_double2 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_length(simd_double3 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_length(simd_double4 __x) { return simd_precise_length(__x); } +static double SIMD_CFUNC simd_length(simd_double8 __x) { return simd_precise_length(__x); } +#endif + +static float SIMD_CFUNC simd_length_squared(simd_float2 __x) { return simd_dot(__x,__x); } +static float SIMD_CFUNC simd_length_squared(simd_float3 __x) { return simd_dot(__x,__x); } +static float SIMD_CFUNC simd_length_squared(simd_float4 __x) { return simd_dot(__x,__x); } +static float SIMD_CFUNC simd_length_squared(simd_float8 __x) { return simd_dot(__x,__x); } +static float SIMD_CFUNC simd_length_squared(simd_float16 __x) { return simd_dot(__x,__x); } +static double SIMD_CFUNC simd_length_squared(simd_double2 __x) { return simd_dot(__x,__x); } +static double SIMD_CFUNC simd_length_squared(simd_double3 __x) { return simd_dot(__x,__x); } +static double SIMD_CFUNC simd_length_squared(simd_double4 __x) { return simd_dot(__x,__x); } +static double SIMD_CFUNC simd_length_squared(simd_double8 __x) { return simd_dot(__x,__x); } + +static float SIMD_CFUNC simd_norm_one(simd_float2 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_one(simd_float3 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_one(simd_float4 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_one(simd_float8 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_one(simd_float16 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_one(simd_double2 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_one(simd_double3 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_one(simd_double4 __x) { return simd_reduce_add(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_one(simd_double8 __x) { return simd_reduce_add(__tg_fabs(__x)); } + +static float SIMD_CFUNC simd_norm_inf(simd_float2 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_inf(simd_float3 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_inf(simd_float4 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_inf(simd_float8 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static float SIMD_CFUNC simd_norm_inf(simd_float16 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_inf(simd_double2 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_inf(simd_double3 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_inf(simd_double4 __x) { return simd_reduce_max(__tg_fabs(__x)); } +static double SIMD_CFUNC simd_norm_inf(simd_double8 __x) { return simd_reduce_max(__tg_fabs(__x)); } + +static float SIMD_CFUNC simd_precise_distance(simd_float2 __x, simd_float2 __y) { return simd_precise_length(__x - __y); } +static float SIMD_CFUNC simd_precise_distance(simd_float3 __x, simd_float3 __y) { return simd_precise_length(__x - __y); } +static float SIMD_CFUNC simd_precise_distance(simd_float4 __x, simd_float4 __y) { return simd_precise_length(__x - __y); } +static float SIMD_CFUNC simd_precise_distance(simd_float8 __x, simd_float8 __y) { return simd_precise_length(__x - __y); } +static float SIMD_CFUNC simd_precise_distance(simd_float16 __x, simd_float16 __y) { return simd_precise_length(__x - __y); } +static double SIMD_CFUNC simd_precise_distance(simd_double2 __x, simd_double2 __y) { return simd_precise_length(__x - __y); } +static double SIMD_CFUNC simd_precise_distance(simd_double3 __x, simd_double3 __y) { return simd_precise_length(__x - __y); } +static double SIMD_CFUNC simd_precise_distance(simd_double4 __x, simd_double4 __y) { return simd_precise_length(__x - __y); } +static double SIMD_CFUNC simd_precise_distance(simd_double8 __x, simd_double8 __y) { return simd_precise_length(__x - __y); } + +static float SIMD_CFUNC simd_fast_distance(simd_float2 __x, simd_float2 __y) { return simd_fast_length(__x - __y); } +static float SIMD_CFUNC simd_fast_distance(simd_float3 __x, simd_float3 __y) { return simd_fast_length(__x - __y); } +static float SIMD_CFUNC simd_fast_distance(simd_float4 __x, simd_float4 __y) { return simd_fast_length(__x - __y); } +static float SIMD_CFUNC simd_fast_distance(simd_float8 __x, simd_float8 __y) { return simd_fast_length(__x - __y); } +static float SIMD_CFUNC simd_fast_distance(simd_float16 __x, simd_float16 __y) { return simd_fast_length(__x - __y); } +static double SIMD_CFUNC simd_fast_distance(simd_double2 __x, simd_double2 __y) { return simd_fast_length(__x - __y); } +static double SIMD_CFUNC simd_fast_distance(simd_double3 __x, simd_double3 __y) { return simd_fast_length(__x - __y); } +static double SIMD_CFUNC simd_fast_distance(simd_double4 __x, simd_double4 __y) { return simd_fast_length(__x - __y); } +static double SIMD_CFUNC simd_fast_distance(simd_double8 __x, simd_double8 __y) { return simd_fast_length(__x - __y); } + +#if defined __FAST_MATH__ +static float SIMD_CFUNC simd_distance(simd_float2 __x, simd_float2 __y) { return simd_fast_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float3 __x, simd_float3 __y) { return simd_fast_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float4 __x, simd_float4 __y) { return simd_fast_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float8 __x, simd_float8 __y) { return simd_fast_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float16 __x, simd_float16 __y) { return simd_fast_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double2 __x, simd_double2 __y) { return simd_fast_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double3 __x, simd_double3 __y) { return simd_fast_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double4 __x, simd_double4 __y) { return simd_fast_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double8 __x, simd_double8 __y) { return simd_fast_distance(__x,__y); } +#else +static float SIMD_CFUNC simd_distance(simd_float2 __x, simd_float2 __y) { return simd_precise_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float3 __x, simd_float3 __y) { return simd_precise_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float4 __x, simd_float4 __y) { return simd_precise_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float8 __x, simd_float8 __y) { return simd_precise_distance(__x,__y); } +static float SIMD_CFUNC simd_distance(simd_float16 __x, simd_float16 __y) { return simd_precise_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double2 __x, simd_double2 __y) { return simd_precise_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double3 __x, simd_double3 __y) { return simd_precise_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double4 __x, simd_double4 __y) { return simd_precise_distance(__x,__y); } +static double SIMD_CFUNC simd_distance(simd_double8 __x, simd_double8 __y) { return simd_precise_distance(__x,__y); } +#endif + +static float SIMD_CFUNC simd_distance_squared(simd_float2 __x, simd_float2 __y) { return simd_length_squared(__x - __y); } +static float SIMD_CFUNC simd_distance_squared(simd_float3 __x, simd_float3 __y) { return simd_length_squared(__x - __y); } +static float SIMD_CFUNC simd_distance_squared(simd_float4 __x, simd_float4 __y) { return simd_length_squared(__x - __y); } +static float SIMD_CFUNC simd_distance_squared(simd_float8 __x, simd_float8 __y) { return simd_length_squared(__x - __y); } +static float SIMD_CFUNC simd_distance_squared(simd_float16 __x, simd_float16 __y) { return simd_length_squared(__x - __y); } +static double SIMD_CFUNC simd_distance_squared(simd_double2 __x, simd_double2 __y) { return simd_length_squared(__x - __y); } +static double SIMD_CFUNC simd_distance_squared(simd_double3 __x, simd_double3 __y) { return simd_length_squared(__x - __y); } +static double SIMD_CFUNC simd_distance_squared(simd_double4 __x, simd_double4 __y) { return simd_length_squared(__x - __y); } +static double SIMD_CFUNC simd_distance_squared(simd_double8 __x, simd_double8 __y) { return simd_length_squared(__x - __y); } + +static simd_float2 SIMD_CFUNC simd_precise_normalize(simd_float2 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_float3 SIMD_CFUNC simd_precise_normalize(simd_float3 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_float4 SIMD_CFUNC simd_precise_normalize(simd_float4 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_float8 SIMD_CFUNC simd_precise_normalize(simd_float8 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_float16 SIMD_CFUNC simd_precise_normalize(simd_float16 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_double2 SIMD_CFUNC simd_precise_normalize(simd_double2 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_double3 SIMD_CFUNC simd_precise_normalize(simd_double3 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_double4 SIMD_CFUNC simd_precise_normalize(simd_double4 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } +static simd_double8 SIMD_CFUNC simd_precise_normalize(simd_double8 __x) { return __x * simd_precise_rsqrt(simd_length_squared(__x)); } + +static simd_float2 SIMD_CFUNC simd_fast_normalize(simd_float2 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_float3 SIMD_CFUNC simd_fast_normalize(simd_float3 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_float4 SIMD_CFUNC simd_fast_normalize(simd_float4 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_float8 SIMD_CFUNC simd_fast_normalize(simd_float8 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_float16 SIMD_CFUNC simd_fast_normalize(simd_float16 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_double2 SIMD_CFUNC simd_fast_normalize(simd_double2 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_double3 SIMD_CFUNC simd_fast_normalize(simd_double3 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_double4 SIMD_CFUNC simd_fast_normalize(simd_double4 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } +static simd_double8 SIMD_CFUNC simd_fast_normalize(simd_double8 __x) { return __x * simd_fast_rsqrt(simd_length_squared(__x)); } + +#if defined __FAST_MATH__ +static simd_float2 SIMD_CFUNC simd_normalize(simd_float2 __x) { return simd_fast_normalize(__x); } +static simd_float3 SIMD_CFUNC simd_normalize(simd_float3 __x) { return simd_fast_normalize(__x); } +static simd_float4 SIMD_CFUNC simd_normalize(simd_float4 __x) { return simd_fast_normalize(__x); } +static simd_float8 SIMD_CFUNC simd_normalize(simd_float8 __x) { return simd_fast_normalize(__x); } +static simd_float16 SIMD_CFUNC simd_normalize(simd_float16 __x) { return simd_fast_normalize(__x); } +static simd_double2 SIMD_CFUNC simd_normalize(simd_double2 __x) { return simd_fast_normalize(__x); } +static simd_double3 SIMD_CFUNC simd_normalize(simd_double3 __x) { return simd_fast_normalize(__x); } +static simd_double4 SIMD_CFUNC simd_normalize(simd_double4 __x) { return simd_fast_normalize(__x); } +static simd_double8 SIMD_CFUNC simd_normalize(simd_double8 __x) { return simd_fast_normalize(__x); } +#else +static simd_float2 SIMD_CFUNC simd_normalize(simd_float2 __x) { return simd_precise_normalize(__x); } +static simd_float3 SIMD_CFUNC simd_normalize(simd_float3 __x) { return simd_precise_normalize(__x); } +static simd_float4 SIMD_CFUNC simd_normalize(simd_float4 __x) { return simd_precise_normalize(__x); } +static simd_float8 SIMD_CFUNC simd_normalize(simd_float8 __x) { return simd_precise_normalize(__x); } +static simd_float16 SIMD_CFUNC simd_normalize(simd_float16 __x) { return simd_precise_normalize(__x); } +static simd_double2 SIMD_CFUNC simd_normalize(simd_double2 __x) { return simd_precise_normalize(__x); } +static simd_double3 SIMD_CFUNC simd_normalize(simd_double3 __x) { return simd_precise_normalize(__x); } +static simd_double4 SIMD_CFUNC simd_normalize(simd_double4 __x) { return simd_precise_normalize(__x); } +static simd_double8 SIMD_CFUNC simd_normalize(simd_double8 __x) { return simd_precise_normalize(__x); } +#endif + +static simd_float3 SIMD_CFUNC simd_cross(simd_float2 __x, simd_float2 __y) { return (simd_float3){ 0, 0, __x.x*__y.y - __x.y*__y.x }; } +static simd_float3 SIMD_CFUNC simd_cross(simd_float3 __x, simd_float3 __y) { return (__x.zxy*__y - __x*__y.zxy).zxy; } +static simd_double3 SIMD_CFUNC simd_cross(simd_double2 __x, simd_double2 __y) { return (simd_double3){ 0, 0, __x.x*__y.y - __x.y*__y.x }; } +static simd_double3 SIMD_CFUNC simd_cross(simd_double3 __x, simd_double3 __y) { return (__x.zxy*__y - __x*__y.zxy).zxy; } + +static simd_float2 SIMD_CFUNC simd_reflect(simd_float2 __x, simd_float2 __n) { return __x - 2*simd_dot(__x,__n)*__n; } +static simd_float3 SIMD_CFUNC simd_reflect(simd_float3 __x, simd_float3 __n) { return __x - 2*simd_dot(__x,__n)*__n; } +static simd_float4 SIMD_CFUNC simd_reflect(simd_float4 __x, simd_float4 __n) { return __x - 2*simd_dot(__x,__n)*__n; } +static simd_double2 SIMD_CFUNC simd_reflect(simd_double2 __x, simd_double2 __n) { return __x - 2*simd_dot(__x,__n)*__n; } +static simd_double3 SIMD_CFUNC simd_reflect(simd_double3 __x, simd_double3 __n) { return __x - 2*simd_dot(__x,__n)*__n; } +static simd_double4 SIMD_CFUNC simd_reflect(simd_double4 __x, simd_double4 __n) { return __x - 2*simd_dot(__x,__n)*__n; } + +static simd_float2 SIMD_CFUNC simd_refract(simd_float2 __x, simd_float2 __n, float __eta) { + const float __k = 1.0f - __eta*__eta*(1.0f - simd_dot(__x,__n)*simd_dot(__x,__n)); + return (__k >= 0.0f) ? __eta*__x - (__eta*simd_dot(__x,__n) + sqrt(__k))*__n : (simd_float2)0.0f; +} +static simd_float3 SIMD_CFUNC simd_refract(simd_float3 __x, simd_float3 __n, float __eta) { + const float __k = 1.0f - __eta*__eta*(1.0f - simd_dot(__x,__n)*simd_dot(__x,__n)); + return (__k >= 0.0f) ? __eta*__x - (__eta*simd_dot(__x,__n) + sqrt(__k))*__n : (simd_float3)0.0f; +} +static simd_float4 SIMD_CFUNC simd_refract(simd_float4 __x, simd_float4 __n, float __eta) { + const float __k = 1.0f - __eta*__eta*(1.0f - simd_dot(__x,__n)*simd_dot(__x,__n)); + return (__k >= 0.0f) ? __eta*__x - (__eta*simd_dot(__x,__n) + sqrt(__k))*__n : (simd_float4)0.0f; +} +static simd_double2 SIMD_CFUNC simd_refract(simd_double2 __x, simd_double2 __n, double __eta) { + const double __k = 1.0 - __eta*__eta*(1.0 - simd_dot(__x,__n)*simd_dot(__x,__n)); + return (__k >= 0.0) ? __eta*__x - (__eta*simd_dot(__x,__n) + sqrt(__k))*__n : (simd_double2)0.0; +} +static simd_double3 SIMD_CFUNC simd_refract(simd_double3 __x, simd_double3 __n, double __eta) { + const double __k = 1.0 - __eta*__eta*(1.0 - simd_dot(__x,__n)*simd_dot(__x,__n)); + return (__k >= 0.0) ? __eta*__x - (__eta*simd_dot(__x,__n) + sqrt(__k))*__n : (simd_double3)0.0; +} +static simd_double4 SIMD_CFUNC simd_refract(simd_double4 __x, simd_double4 __n, double __eta) { + const double __k = 1.0 - __eta*__eta*(1.0 - simd_dot(__x,__n)*simd_dot(__x,__n)); + return (__k >= 0.0) ? __eta*__x - (__eta*simd_dot(__x,__n) + sqrt(__k))*__n : (simd_double4)0.0; +} + +#if SIMD_LIBRARY_VERSION >= 2 +static float SIMD_CFUNC simd_orient(simd_float2 __x, simd_float2 __y) { + return _simd_orient_vf2(__x, __y); +} +static double SIMD_CFUNC simd_orient(simd_double2 __x, simd_double2 __y) { + return _simd_orient_vd2(__x, __y); +} +static float SIMD_CFUNC simd_orient(simd_float3 __x, simd_float3 __y, simd_float3 __z) { + return _simd_orient_vf3(__x, __y, __z); +} +static double SIMD_CFUNC simd_orient(simd_double3 __x, simd_double3 __y, simd_double3 __z) { + simd_double3 __args[3] = { __x, __y, __z }; + return _simd_orient_vd3((const double *)__args); +} + +static float SIMD_CFUNC simd_orient(simd_float2 __a, simd_float2 __b, simd_float2 __c) { + return _simd_orient_pf2(__a, __b, __c); +} +static double SIMD_CFUNC simd_orient(simd_double2 __a, simd_double2 __b, simd_double2 __c) { + return _simd_orient_pd2(__a, __b, __c); +} +static float SIMD_CFUNC simd_orient(simd_float3 __a, simd_float3 __b, simd_float3 __c, simd_float3 __d) { + return _simd_orient_pf3(__a, __b, __c, __d); +} +static double SIMD_CFUNC simd_orient(simd_double3 __a, simd_double3 __b, simd_double3 __c, simd_double3 __d) { + simd_double3 __args[4] = { __a, __b, __c, __d }; + return _simd_orient_pd3((const double *)__args); +} + +static float SIMD_CFUNC simd_incircle(simd_float2 __x, simd_float2 __a, simd_float2 __b, simd_float2 __c) { + return _simd_incircle_pf2(__x, __a, __b, __c); +} +static double SIMD_CFUNC simd_incircle(simd_double2 __x, simd_double2 __a, simd_double2 __b, simd_double2 __c) { + return _simd_incircle_pd2(__x, __a, __b, __c); +} +static float SIMD_CFUNC simd_insphere(simd_float3 __x, simd_float3 __a, simd_float3 __b, simd_float3 __c, simd_float3 __d) { + return _simd_insphere_pf3(__x, __a, __b, __c, __d); +} +static double SIMD_CFUNC simd_insphere(simd_double3 __x, simd_double3 __a, simd_double3 __b, simd_double3 __c, simd_double3 __d) { + simd_double3 __args[5] = { __x, __a, __b, __c, __d }; + return _simd_insphere_pd3((const double *)__args); +} +#endif /* SIMD_LIBRARY_VERSION */ + +#ifdef __cplusplus +} +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* __SIMD_COMMON_HEADER__ */ diff --git a/vfsoverlay/logic.h b/vfsoverlay/logic.h new file mode 100644 index 00000000..0d447a91 --- /dev/null +++ b/vfsoverlay/logic.h @@ -0,0 +1,1315 @@ +/*! @header + * The interfaces declared in this header provide logical and bitwise + * operations on vectors. Some of these function operate elementwise, + * and some produce a scalar result that depends on all lanes of the input. + * + * For functions returning a boolean value, the return type in C and + * Objective-C is _Bool; for C++ it is bool. + * + * Function Result + * ------------------------------------------------------------------ + * simd_all(comparison) True if and only if the comparison is true + * in every vector lane. e.g.: + * + * if (simd_all(x == 0.0f)) { + * // executed if every lane of x + * // contains zero. + * } + * + * The precise function of simd_all is to + * return the high-order bit of the result + * of a horizontal bitwise AND of all vector + * lanes. + * + * simd_any(comparison) True if and only if the comparison is true + * in at least one vector lane. e.g.: + * + * if (simd_any(x < 0.0f)) { + * // executed if any lane of x + * // contains a negative value. + * } + * + * The precise function of simd_all is to + * return the high-order bit of the result + * of a horizontal bitwise OR of all vector + * lanes. + * + * simd_select(x,y,mask) For each lane in the result, selects the + * corresponding element of x if the high- + * order bit of the corresponding element of + * mask is 0, and the corresponding element + * of y otherwise. + * + * simd_bitselect(x,y,mask) For each bit in the result, selects the + * corresponding bit of x if the corresponding + * bit of mask is clear, and the corresponding + * of y otherwise. + * + * In C++, these functions are available under the simd:: namespace: + * + * C++ Function Equivalent C Function + * -------------------------------------------------------------------- + * simd::all(comparison) simd_all(comparison) + * simd::any(comparison) simd_any(comparison) + * simd::select(x,y,mask) simd_select(x,y,mask) + * simd::bitselect(x,y,mask) simd_bitselect(x,y,mask) + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_LOGIC_HEADER +#define SIMD_LOGIC_HEADER + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_char64 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar64 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_short32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort32 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_int16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_uint16 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_long8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong2 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong3 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong4 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong8 x); +/*! @abstract True if and only if the high-order bit of any lane of the + * vector is set. + * @discussion Deprecated. Use simd_any instead. */ +#define vector_any simd_any + +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_char64 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar64 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_short32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort32 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_int16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_uint16 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_long8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong2 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong3 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong4 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. */ +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong8 x); +/*! @abstract True if and only if the high-order bit of every lane of the + * vector is set. + * @discussion Deprecated. Use simd_all instead. */ +#define vector_all simd_all + +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float2 simd_select(simd_float2 x, simd_float2 y, simd_int2 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float3 simd_select(simd_float3 x, simd_float3 y, simd_int3 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float4 simd_select(simd_float4 x, simd_float4 y, simd_int4 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float8 simd_select(simd_float8 x, simd_float8 y, simd_int8 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_float16 simd_select(simd_float16 x, simd_float16 y, simd_int16 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double2 simd_select(simd_double2 x, simd_double2 y, simd_long2 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double3 simd_select(simd_double3 x, simd_double3 y, simd_long3 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double4 simd_select(simd_double4 x, simd_double4 y, simd_long4 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ +static inline SIMD_CFUNC simd_double8 simd_select(simd_double8 x, simd_double8 y, simd_long8 mask); +/*! @abstract For each lane in the result, selects the corresponding element + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. + * @discussion Deprecated. Use simd_select instead. */ +#define vector_select simd_select + +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char2 simd_bitselect(simd_char2 x, simd_char2 y, simd_char2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char3 simd_bitselect(simd_char3 x, simd_char3 y, simd_char3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char4 simd_bitselect(simd_char4 x, simd_char4 y, simd_char4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char8 simd_bitselect(simd_char8 x, simd_char8 y, simd_char8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char16 simd_bitselect(simd_char16 x, simd_char16 y, simd_char16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char32 simd_bitselect(simd_char32 x, simd_char32 y, simd_char32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_char64 simd_bitselect(simd_char64 x, simd_char64 y, simd_char64 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar2 simd_bitselect(simd_uchar2 x, simd_uchar2 y, simd_char2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar3 simd_bitselect(simd_uchar3 x, simd_uchar3 y, simd_char3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar4 simd_bitselect(simd_uchar4 x, simd_uchar4 y, simd_char4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar8 simd_bitselect(simd_uchar8 x, simd_uchar8 y, simd_char8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar16 simd_bitselect(simd_uchar16 x, simd_uchar16 y, simd_char16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar32 simd_bitselect(simd_uchar32 x, simd_uchar32 y, simd_char32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uchar64 simd_bitselect(simd_uchar64 x, simd_uchar64 y, simd_char64 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short2 simd_bitselect(simd_short2 x, simd_short2 y, simd_short2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short3 simd_bitselect(simd_short3 x, simd_short3 y, simd_short3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short4 simd_bitselect(simd_short4 x, simd_short4 y, simd_short4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short8 simd_bitselect(simd_short8 x, simd_short8 y, simd_short8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short16 simd_bitselect(simd_short16 x, simd_short16 y, simd_short16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_short32 simd_bitselect(simd_short32 x, simd_short32 y, simd_short32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort2 simd_bitselect(simd_ushort2 x, simd_ushort2 y, simd_short2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort3 simd_bitselect(simd_ushort3 x, simd_ushort3 y, simd_short3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort4 simd_bitselect(simd_ushort4 x, simd_ushort4 y, simd_short4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort8 simd_bitselect(simd_ushort8 x, simd_ushort8 y, simd_short8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort16 simd_bitselect(simd_ushort16 x, simd_ushort16 y, simd_short16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ushort32 simd_bitselect(simd_ushort32 x, simd_ushort32 y, simd_short32 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int2 simd_bitselect(simd_int2 x, simd_int2 y, simd_int2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int3 simd_bitselect(simd_int3 x, simd_int3 y, simd_int3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int4 simd_bitselect(simd_int4 x, simd_int4 y, simd_int4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int8 simd_bitselect(simd_int8 x, simd_int8 y, simd_int8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_int16 simd_bitselect(simd_int16 x, simd_int16 y, simd_int16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint2 simd_bitselect(simd_uint2 x, simd_uint2 y, simd_int2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint3 simd_bitselect(simd_uint3 x, simd_uint3 y, simd_int3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint4 simd_bitselect(simd_uint4 x, simd_uint4 y, simd_int4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint8 simd_bitselect(simd_uint8 x, simd_uint8 y, simd_int8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_uint16 simd_bitselect(simd_uint16 x, simd_uint16 y, simd_int16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float2 simd_bitselect(simd_float2 x, simd_float2 y, simd_int2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float3 simd_bitselect(simd_float3 x, simd_float3 y, simd_int3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float4 simd_bitselect(simd_float4 x, simd_float4 y, simd_int4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float8 simd_bitselect(simd_float8 x, simd_float8 y, simd_int8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_float16 simd_bitselect(simd_float16 x, simd_float16 y, simd_int16 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long2 simd_bitselect(simd_long2 x, simd_long2 y, simd_long2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long3 simd_bitselect(simd_long3 x, simd_long3 y, simd_long3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long4 simd_bitselect(simd_long4 x, simd_long4 y, simd_long4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_long8 simd_bitselect(simd_long8 x, simd_long8 y, simd_long8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong2 simd_bitselect(simd_ulong2 x, simd_ulong2 y, simd_long2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong3 simd_bitselect(simd_ulong3 x, simd_ulong3 y, simd_long3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong4 simd_bitselect(simd_ulong4 x, simd_ulong4 y, simd_long4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_ulong8 simd_bitselect(simd_ulong8 x, simd_ulong8 y, simd_long8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double2 simd_bitselect(simd_double2 x, simd_double2 y, simd_long2 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double3 simd_bitselect(simd_double3 x, simd_double3 y, simd_long3 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double4 simd_bitselect(simd_double4 x, simd_double4 y, simd_long4 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ +static inline SIMD_CFUNC simd_double8 simd_bitselect(simd_double8 x, simd_double8 y, simd_long8 mask); +/*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. + * @discussion Deprecated. Use simd_bitselect instead. */ +#define vector_bitselect simd_bitselect + +#ifdef __cplusplus +} /* extern "C" */ + +namespace simd { + /*! @abstract True if and only if the high-order bit of every lane is set. */ + template static SIMD_CPPFUNC simd_bool all(const inttypeN predicate) { return ::simd_all(predicate); } + /*! @abstract True if and only if the high-order bit of any lane is set. */ + template static SIMD_CPPFUNC simd_bool any(const inttypeN predicate) { return ::simd_any(predicate); } + /*! @abstract Each lane of the result is selected from the corresponding lane + * of x or y according to whether the high-order bit of the corresponding + * lane of mask is 0 or 1, respectively. */ + template static SIMD_CPPFUNC fptypeN select(const fptypeN x, const fptypeN y, const inttypeN predicate) { return ::simd_select(x,y,predicate); } + /*! @abstract For each bit in the result, selects the corresponding bit of x + * or y according to whether the corresponding bit of mask is 0 or 1, + * respectively. */ + template static SIMD_CPPFUNC typeN bitselect(const typeN x, const typeN y, const inttypeN mask) { return ::simd_bitselect(x,y,mask); } +} + +extern "C" { +#endif /* __cplusplus */ + +#pragma mark - Implementations + +static inline SIMD_CFUNC simd_bool simd_any(simd_char2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x3); +#elif defined __arm64__ + return simd_any(x.xyxy); +#else + union { uint16_t i; simd_char2 v; } u = { .v = x }; + return (u.i & 0x8080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x7); +#elif defined __arm64__ + return simd_any(x.xyzz); +#else + union { uint32_t i; simd_char3 v; } u = { .v = x }; + return (u.i & 0x808080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xf); +#elif defined __arm64__ + return simd_any(x.xyzwxyzw); +#else + union { uint32_t i; simd_char4 v; } u = { .v = x }; + return (u.i & 0x80808080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xff); +#elif defined __arm64__ + return vmaxv_u8(x) & 0x80; +#else + union { uint64_t i; simd_char8 v; } u = { .v = x }; + return (u.i & 0x8080808080808080); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char16 x) { +#if defined __SSE2__ + return _mm_movemask_epi8((__m128i)x); +#elif defined __arm64__ + return vmaxvq_u8(x) & 0x80; +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char32 x) { +#if defined __AVX2__ + return _mm256_movemask_epi8(x); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_char64 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar2 x) { + return simd_any((simd_char2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar3 x) { + return simd_any((simd_char3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar4 x) { + return simd_any((simd_char4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar8 x) { + return simd_any((simd_char8)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar16 x) { + return simd_any((simd_char16)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar32 x) { + return simd_any((simd_char32)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uchar64 x) { + return simd_any((simd_char64)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xa); +#elif defined __arm64__ + return simd_any(x.xyxy); +#else + union { uint32_t i; simd_short2 v; } u = { .v = x }; + return (u.i & 0x80008000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0x2a); +#elif defined __arm64__ + return simd_any(x.xyzz); +#else + union { uint64_t i; simd_short3 v; } u = { .v = x }; + return (u.i & 0x800080008000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xaa); +#elif defined __arm64__ + return vmaxv_u16(x) & 0x8000; +#else + union { uint64_t i; simd_short4 v; } u = { .v = x }; + return (u.i & 0x8000800080008000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)x) & 0xaaaa); +#elif defined __arm64__ + return vmaxvq_u16(x) & 0x8000; +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short16 x) { +#if defined __AVX2__ + return (_mm256_movemask_epi8(x) & 0xaaaaaaaa); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_short32 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort2 x) { + return simd_any((simd_short2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort3 x) { + return simd_any((simd_short3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort4 x) { + return simd_any((simd_short4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort8 x) { + return simd_any((simd_short8)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort16 x) { + return simd_any((simd_short16)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ushort32 x) { + return simd_any((simd_short32)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int2 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x3); +#elif defined __arm64__ + return vmaxv_u32(x) & 0x80000000; +#else + union { uint64_t i; simd_int2 v; } u = { .v = x }; + return (u.i & 0x8000000080000000); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int3 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x7); +#elif defined __arm64__ + return simd_any(x.xyzz); +#else + return (x.x | x.y | x.z) & 0x80000000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int4 x) { +#if defined __SSE2__ + return _mm_movemask_ps((__m128)x); +#elif defined __arm64__ + return vmaxvq_u32(x) & 0x80000000; +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int8 x) { +#if defined __AVX__ + return _mm256_movemask_ps(x); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_int16 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint2 x) { + return simd_any((simd_int2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint3 x) { + return simd_any((simd_int3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint4 x) { + return simd_any((simd_int4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint8 x) { + return simd_any((simd_int8)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_uint16 x) { + return simd_any((simd_int16)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long2 x) { +#if defined __SSE2__ + return _mm_movemask_pd((__m128d)x); +#elif defined __arm64__ + return (x.x | x.y) & 0x8000000000000000U; +#else + return (x.x | x.y) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long3 x) { +#if defined __AVX__ + return (_mm256_movemask_pd(simd_make_long4_undef(x)) & 0x7); +#else + return (x.x | x.y | x.z) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long4 x) { +#if defined __AVX__ + return _mm256_movemask_pd(x); +#else + return simd_any(x.lo | x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_any(simd_long8 x) { + return simd_any(x.lo | x.hi); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong2 x) { + return simd_any((simd_long2)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong3 x) { + return simd_any((simd_long3)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong4 x) { + return simd_any((simd_long4)x); +} +static inline SIMD_CFUNC simd_bool simd_any(simd_ulong8 x) { + return simd_any((simd_long8)x); +} + +static inline SIMD_CFUNC simd_bool simd_all(simd_char2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x3) == 0x3; +#elif defined __arm64__ + return simd_all(x.xyxy); +#else + union { uint16_t i; simd_char2 v; } u = { .v = x }; + return (u.i & 0x8080) == 0x8080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0x7) == 0x7; +#elif defined __arm64__ + return simd_all(x.xyzz); +#else + union { uint32_t i; simd_char3 v; } u = { .v = x }; + return (u.i & 0x808080) == 0x808080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xf) == 0xf; +#elif defined __arm64__ + return simd_all(x.xyzwxyzw); +#else + union { uint32_t i; simd_char4 v; } u = { .v = x }; + return (u.i & 0x80808080) == 0x80808080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_char16_undef(x)) & 0xff) == 0xff; +#elif defined __arm64__ + return vminv_u8(x) & 0x80; +#else + union { uint64_t i; simd_char8 v; } u = { .v = x }; + return (u.i & 0x8080808080808080) == 0x8080808080808080; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char16 x) { +#if defined __SSE2__ + return _mm_movemask_epi8((__m128i)x) == 0xffff; +#elif defined __arm64__ + return vminvq_u8(x) & 0x80; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char32 x) { +#if defined __AVX2__ + return _mm256_movemask_epi8(x) == 0xffffffff; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_char64 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar2 x) { + return simd_all((simd_char2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar3 x) { + return simd_all((simd_char3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar4 x) { + return simd_all((simd_char4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar8 x) { + return simd_all((simd_char8)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar16 x) { + return simd_all((simd_char16)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar32 x) { + return simd_all((simd_char32)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uchar64 x) { + return simd_all((simd_char64)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short2 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xa) == 0xa; +#elif defined __arm64__ + return simd_all(x.xyxy); +#else + union { uint32_t i; simd_short2 v; } u = { .v = x }; + return (u.i & 0x80008000) == 0x80008000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short3 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0x2a) == 0x2a; +#elif defined __arm64__ + return simd_all(x.xyzz); +#else + union { uint64_t i; simd_short3 v; } u = { .v = x }; + return (u.i & 0x800080008000) == 0x800080008000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short4 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)simd_make_short8_undef(x)) & 0xaa) == 0xaa; +#elif defined __arm64__ + return vminv_u16(x) & 0x8000; +#else + union { uint64_t i; simd_short4 v; } u = { .v = x }; + return (u.i & 0x8000800080008000) == 0x8000800080008000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short8 x) { +#if defined __SSE2__ + return (_mm_movemask_epi8((__m128i)x) & 0xaaaa) == 0xaaaa; +#elif defined __arm64__ + return vminvq_u16(x) & 0x8000; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short16 x) { +#if defined __AVX2__ + return (_mm256_movemask_epi8(x) & 0xaaaaaaaa) == 0xaaaaaaaa; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_short32 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort2 x) { + return simd_all((simd_short2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort3 x) { + return simd_all((simd_short3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort4 x) { + return simd_all((simd_short4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort8 x) { + return simd_all((simd_short8)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort16 x) { + return simd_all((simd_short16)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ushort32 x) { + return simd_all((simd_short32)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int2 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x3) == 0x3; +#elif defined __arm64__ + return vminv_u32(x) & 0x80000000; +#else + union { uint64_t i; simd_int2 v; } u = { .v = x }; + return (u.i & 0x8000000080000000) == 0x8000000080000000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int3 x) { +#if defined __SSE2__ + return (_mm_movemask_ps((__m128)simd_make_int4_undef(x)) & 0x7) == 0x7; +#elif defined __arm64__ + return simd_all(x.xyzz); +#else + return (x.x & x.y & x.z) & 0x80000000; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int4 x) { +#if defined __SSE2__ + return _mm_movemask_ps((__m128)x) == 0xf; +#elif defined __arm64__ + return vminvq_u32(x) & 0x80000000; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int8 x) { +#if defined __AVX__ + return _mm256_movemask_ps(x) == 0xff; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_int16 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint2 x) { + return simd_all((simd_int2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint3 x) { + return simd_all((simd_int3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint4 x) { + return simd_all((simd_int4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint8 x) { + return simd_all((simd_int8)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_uint16 x) { + return simd_all((simd_int16)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long2 x) { +#if defined __SSE2__ + return _mm_movemask_pd((__m128d)x) == 0x3; +#elif defined __arm64__ + return (x.x & x.y) & 0x8000000000000000U; +#else + return (x.x & x.y) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long3 x) { +#if defined __AVX__ + return (_mm256_movemask_pd(simd_make_long4_undef(x)) & 0x7) == 0x7; +#else + return (x.x & x.y & x.z) & 0x8000000000000000U; +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long4 x) { +#if defined __AVX__ + return _mm256_movemask_pd(x) == 0xf; +#else + return simd_all(x.lo & x.hi); +#endif +} +static inline SIMD_CFUNC simd_bool simd_all(simd_long8 x) { + return simd_all(x.lo & x.hi); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong2 x) { + return simd_all((simd_long2)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong3 x) { + return simd_all((simd_long3)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong4 x) { + return simd_all((simd_long4)x); +} +static inline SIMD_CFUNC simd_bool simd_all(simd_ulong8 x) { + return simd_all((simd_long8)x); +} + +static inline SIMD_CFUNC simd_float2 simd_select(simd_float2 x, simd_float2 y, simd_int2 mask) { + return simd_make_float2(simd_select(simd_make_float4_undef(x), simd_make_float4_undef(y), simd_make_int4_undef(mask))); +} +static inline SIMD_CFUNC simd_float3 simd_select(simd_float3 x, simd_float3 y, simd_int3 mask) { + return simd_make_float3(simd_select(simd_make_float4_undef(x), simd_make_float4_undef(y), simd_make_int4_undef(mask))); +} +static inline SIMD_CFUNC simd_float4 simd_select(simd_float4 x, simd_float4 y, simd_int4 mask) { +#if defined __SSE4_1__ + return _mm_blendv_ps(x, y, (__m128)mask); +#else + return simd_bitselect(x, y, mask >> 31); +#endif +} +static inline SIMD_CFUNC simd_float8 simd_select(simd_float8 x, simd_float8 y, simd_int8 mask) { +#if defined __AVX__ + return _mm256_blendv_ps(x, y, mask); +#else + return simd_bitselect(x, y, mask >> 31); +#endif +} +static inline SIMD_CFUNC simd_float16 simd_select(simd_float16 x, simd_float16 y, simd_int16 mask) { + return simd_bitselect(x, y, mask >> 31); +} +static inline SIMD_CFUNC simd_double2 simd_select(simd_double2 x, simd_double2 y, simd_long2 mask) { +#if defined __SSE4_1__ + return _mm_blendv_pd(x, y, (__m128d)mask); +#else + return simd_bitselect(x, y, mask >> 63); +#endif +} +static inline SIMD_CFUNC simd_double3 simd_select(simd_double3 x, simd_double3 y, simd_long3 mask) { + return simd_make_double3(simd_select(simd_make_double4_undef(x), simd_make_double4_undef(y), simd_make_long4_undef(mask))); +} +static inline SIMD_CFUNC simd_double4 simd_select(simd_double4 x, simd_double4 y, simd_long4 mask) { +#if defined __AVX__ + return _mm256_blendv_pd(x, y, mask); +#else + return simd_bitselect(x, y, mask >> 63); +#endif +} +static inline SIMD_CFUNC simd_double8 simd_select(simd_double8 x, simd_double8 y, simd_long8 mask) { + return simd_bitselect(x, y, mask >> 63); +} + +static inline SIMD_CFUNC simd_char2 simd_bitselect(simd_char2 x, simd_char2 y, simd_char2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char3 simd_bitselect(simd_char3 x, simd_char3 y, simd_char3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char4 simd_bitselect(simd_char4 x, simd_char4 y, simd_char4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char8 simd_bitselect(simd_char8 x, simd_char8 y, simd_char8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char16 simd_bitselect(simd_char16 x, simd_char16 y, simd_char16 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char32 simd_bitselect(simd_char32 x, simd_char32 y, simd_char32 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_char64 simd_bitselect(simd_char64 x, simd_char64 y, simd_char64 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_uchar2 simd_bitselect(simd_uchar2 x, simd_uchar2 y, simd_char2 mask) { + return (simd_uchar2)simd_bitselect((simd_char2)x, (simd_char2)y, mask); +} +static inline SIMD_CFUNC simd_uchar3 simd_bitselect(simd_uchar3 x, simd_uchar3 y, simd_char3 mask) { + return (simd_uchar3)simd_bitselect((simd_char3)x, (simd_char3)y, mask); +} +static inline SIMD_CFUNC simd_uchar4 simd_bitselect(simd_uchar4 x, simd_uchar4 y, simd_char4 mask) { + return (simd_uchar4)simd_bitselect((simd_char4)x, (simd_char4)y, mask); +} +static inline SIMD_CFUNC simd_uchar8 simd_bitselect(simd_uchar8 x, simd_uchar8 y, simd_char8 mask) { + return (simd_uchar8)simd_bitselect((simd_char8)x, (simd_char8)y, mask); +} +static inline SIMD_CFUNC simd_uchar16 simd_bitselect(simd_uchar16 x, simd_uchar16 y, simd_char16 mask) { + return (simd_uchar16)simd_bitselect((simd_char16)x, (simd_char16)y, mask); +} +static inline SIMD_CFUNC simd_uchar32 simd_bitselect(simd_uchar32 x, simd_uchar32 y, simd_char32 mask) { + return (simd_uchar32)simd_bitselect((simd_char32)x, (simd_char32)y, mask); +} +static inline SIMD_CFUNC simd_uchar64 simd_bitselect(simd_uchar64 x, simd_uchar64 y, simd_char64 mask) { + return (simd_uchar64)simd_bitselect((simd_char64)x, (simd_char64)y, mask); +} +static inline SIMD_CFUNC simd_short2 simd_bitselect(simd_short2 x, simd_short2 y, simd_short2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short3 simd_bitselect(simd_short3 x, simd_short3 y, simd_short3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short4 simd_bitselect(simd_short4 x, simd_short4 y, simd_short4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short8 simd_bitselect(simd_short8 x, simd_short8 y, simd_short8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short16 simd_bitselect(simd_short16 x, simd_short16 y, simd_short16 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_short32 simd_bitselect(simd_short32 x, simd_short32 y, simd_short32 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_ushort2 simd_bitselect(simd_ushort2 x, simd_ushort2 y, simd_short2 mask) { + return (simd_ushort2)simd_bitselect((simd_short2)x, (simd_short2)y, mask); +} +static inline SIMD_CFUNC simd_ushort3 simd_bitselect(simd_ushort3 x, simd_ushort3 y, simd_short3 mask) { + return (simd_ushort3)simd_bitselect((simd_short3)x, (simd_short3)y, mask); +} +static inline SIMD_CFUNC simd_ushort4 simd_bitselect(simd_ushort4 x, simd_ushort4 y, simd_short4 mask) { + return (simd_ushort4)simd_bitselect((simd_short4)x, (simd_short4)y, mask); +} +static inline SIMD_CFUNC simd_ushort8 simd_bitselect(simd_ushort8 x, simd_ushort8 y, simd_short8 mask) { + return (simd_ushort8)simd_bitselect((simd_short8)x, (simd_short8)y, mask); +} +static inline SIMD_CFUNC simd_ushort16 simd_bitselect(simd_ushort16 x, simd_ushort16 y, simd_short16 mask) { + return (simd_ushort16)simd_bitselect((simd_short16)x, (simd_short16)y, mask); +} +static inline SIMD_CFUNC simd_ushort32 simd_bitselect(simd_ushort32 x, simd_ushort32 y, simd_short32 mask) { + return (simd_ushort32)simd_bitselect((simd_short32)x, (simd_short32)y, mask); +} +static inline SIMD_CFUNC simd_int2 simd_bitselect(simd_int2 x, simd_int2 y, simd_int2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int3 simd_bitselect(simd_int3 x, simd_int3 y, simd_int3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int4 simd_bitselect(simd_int4 x, simd_int4 y, simd_int4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int8 simd_bitselect(simd_int8 x, simd_int8 y, simd_int8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_int16 simd_bitselect(simd_int16 x, simd_int16 y, simd_int16 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_uint2 simd_bitselect(simd_uint2 x, simd_uint2 y, simd_int2 mask) { + return (simd_uint2)simd_bitselect((simd_int2)x, (simd_int2)y, mask); +} +static inline SIMD_CFUNC simd_uint3 simd_bitselect(simd_uint3 x, simd_uint3 y, simd_int3 mask) { + return (simd_uint3)simd_bitselect((simd_int3)x, (simd_int3)y, mask); +} +static inline SIMD_CFUNC simd_uint4 simd_bitselect(simd_uint4 x, simd_uint4 y, simd_int4 mask) { + return (simd_uint4)simd_bitselect((simd_int4)x, (simd_int4)y, mask); +} +static inline SIMD_CFUNC simd_uint8 simd_bitselect(simd_uint8 x, simd_uint8 y, simd_int8 mask) { + return (simd_uint8)simd_bitselect((simd_int8)x, (simd_int8)y, mask); +} +static inline SIMD_CFUNC simd_uint16 simd_bitselect(simd_uint16 x, simd_uint16 y, simd_int16 mask) { + return (simd_uint16)simd_bitselect((simd_int16)x, (simd_int16)y, mask); +} +static inline SIMD_CFUNC simd_float2 simd_bitselect(simd_float2 x, simd_float2 y, simd_int2 mask) { + return (simd_float2)simd_bitselect((simd_int2)x, (simd_int2)y, mask); +} +static inline SIMD_CFUNC simd_float3 simd_bitselect(simd_float3 x, simd_float3 y, simd_int3 mask) { + return (simd_float3)simd_bitselect((simd_int3)x, (simd_int3)y, mask); +} +static inline SIMD_CFUNC simd_float4 simd_bitselect(simd_float4 x, simd_float4 y, simd_int4 mask) { + return (simd_float4)simd_bitselect((simd_int4)x, (simd_int4)y, mask); +} +static inline SIMD_CFUNC simd_float8 simd_bitselect(simd_float8 x, simd_float8 y, simd_int8 mask) { + return (simd_float8)simd_bitselect((simd_int8)x, (simd_int8)y, mask); +} +static inline SIMD_CFUNC simd_float16 simd_bitselect(simd_float16 x, simd_float16 y, simd_int16 mask) { + return (simd_float16)simd_bitselect((simd_int16)x, (simd_int16)y, mask); +} +static inline SIMD_CFUNC simd_long2 simd_bitselect(simd_long2 x, simd_long2 y, simd_long2 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_long3 simd_bitselect(simd_long3 x, simd_long3 y, simd_long3 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_long4 simd_bitselect(simd_long4 x, simd_long4 y, simd_long4 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_long8 simd_bitselect(simd_long8 x, simd_long8 y, simd_long8 mask) { + return (x & ~mask) | (y & mask); +} +static inline SIMD_CFUNC simd_ulong2 simd_bitselect(simd_ulong2 x, simd_ulong2 y, simd_long2 mask) { + return (simd_ulong2)simd_bitselect((simd_long2)x, (simd_long2)y, mask); +} +static inline SIMD_CFUNC simd_ulong3 simd_bitselect(simd_ulong3 x, simd_ulong3 y, simd_long3 mask) { + return (simd_ulong3)simd_bitselect((simd_long3)x, (simd_long3)y, mask); +} +static inline SIMD_CFUNC simd_ulong4 simd_bitselect(simd_ulong4 x, simd_ulong4 y, simd_long4 mask) { + return (simd_ulong4)simd_bitselect((simd_long4)x, (simd_long4)y, mask); +} +static inline SIMD_CFUNC simd_ulong8 simd_bitselect(simd_ulong8 x, simd_ulong8 y, simd_long8 mask) { + return (simd_ulong8)simd_bitselect((simd_long8)x, (simd_long8)y, mask); +} +static inline SIMD_CFUNC simd_double2 simd_bitselect(simd_double2 x, simd_double2 y, simd_long2 mask) { + return (simd_double2)simd_bitselect((simd_long2)x, (simd_long2)y, mask); +} +static inline SIMD_CFUNC simd_double3 simd_bitselect(simd_double3 x, simd_double3 y, simd_long3 mask) { + return (simd_double3)simd_bitselect((simd_long3)x, (simd_long3)y, mask); +} +static inline SIMD_CFUNC simd_double4 simd_bitselect(simd_double4 x, simd_double4 y, simd_long4 mask) { + return (simd_double4)simd_bitselect((simd_long4)x, (simd_long4)y, mask); +} +static inline SIMD_CFUNC simd_double8 simd_bitselect(simd_double8 x, simd_double8 y, simd_long8 mask) { + return (simd_double8)simd_bitselect((simd_long8)x, (simd_long8)y, mask); +} + +#ifdef __cplusplus +} +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* __SIMD_LOGIC_HEADER__ */ diff --git a/vfsoverlay/math.h b/vfsoverlay/math.h new file mode 100644 index 00000000..85d51984 --- /dev/null +++ b/vfsoverlay/math.h @@ -0,0 +1,5996 @@ +/*! @header + * The interfaces declared in this header provide elementwise math operations + * on vectors; each lane of the result vector depends only on the data in the + * corresponding lane of the argument(s) to the function. + * + * You should not use the C functions declared in this header directly (these + * are functions with names like `__tg_cos(x)`). These are merely + * implementation details of overloading; instead of calling + * `__tg_cos(x)`, call `cos(x)`. If you are writing C++, use `simd::cos(x)`. + * + * Note that while these vector functions are relatively recent additions, + * scalar fallback is provided for all of them, so they are available even + * when targeting older OS versions. + * + * The following functions are available: + * + * C name C++ name Notes + * ---------------------------------------------------------------------- + * acos(x) simd::acos(x) + * asin(x) simd::asin(x) + * atan(x) simd::atan(x) + * atan2(y,x) simd::atan2(y,x) The argument order matches the scalar + * atan2 function, which gives the angle + * of a line with slope y/x. + * cos(x) simd::cos(x) + * sin(x) simd::sin(x) + * tan(x) simd::tan(x) + * sincos(x) simd::sincos(x) Computes sin(x) and cos(x) more efficiently + * + * cospi(x) simd::cospi(x) Returns cos(pi*x), sin(pi*x), tan(pi*x) + * sinpi(x) simd::sinpi(x) more efficiently and accurately than + * tanpi(x) simd::tanpi(x) would otherwise be possible + * sincospi(x) simd::sincospi(x) Computes sin(pi*x) and cos(pi*x) more efficiently + * + * acosh(x) simd::acosh(x) + * asinh(x) simd::asinh(x) + * atanh(x) simd::atanh(x) + * + * cosh(x) simd::cosh(x) + * sinh(x) simd::sinh(x) + * tanh(x) simd::tanh(x) + * + * exp(x) simd::exp(x) + * exp2(x) simd::exp2(x) + * exp10(x) simd::exp10(x) More efficient that pow(10,x). + * expm1(x) simd::expm1(x) exp(x)-1, accurate even for tiny x. + * + * log(x) simd::log(x) + * log2(x) simd::log2(x) + * log10(x) simd::log10(x) + * log1p(x) simd::log1p(x) log(1+x), accurate even for tiny x. + * + * fabs(x) simd::fabs(x) + * cbrt(x) simd::cbrt(x) + * sqrt(x) simd::sqrt(x) + * pow(x,y) simd::pow(x,y) + * copysign(x,y) simd::copysign(x,y) + * hypot(x,y) simd::hypot(x,y) sqrt(x*x + y*y), computed without + * overflow.1 + * erf(x) simd::erf(x) + * erfc(x) simd::erfc(x) + * tgamma(x) simd::tgamma(x) + * lgamma(x) simd::lgamma(x) + * + * fmod(x,y) simd::fmod(x,y) + * remainder(x,y) simd::remainder(x,y) + * + * ceil(x) simd::ceil(x) + * floor(x) simd::floor(x) + * rint(x) simd::rint(x) + * round(x) simd::round(x) + * trunc(x) simd::trunc(x) + * + * fdim(x,y) simd::fdim(x,y) + * fmax(x,y) simd::fmax(x,y) When one argument to fmin or fmax is + * fmin(x,y) simd::fmin(x,y) constant, use it as the *second* (y) + * argument to get better codegen on some + * architectures. E.g., write fmin(x,2) + * instead of fmin(2,x). + * fma(x,y,z) simd::fma(x,y,z) Fast on arm64 and when targeting AVX2 + * and later; may be quite expensive on + * older hardware. + * simd_muladd(x,y,z) simd::muladd(x,y,z) + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_MATH_HEADER +#define SIMD_MATH_HEADER + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_acos(simd_float2 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_acos(simd_float3 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_acos(simd_float4 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_acos(simd_float8 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_acos(simd_float16 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_acos(simd_double2 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_acos(simd_double3 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_acos(simd_double4 x); +/*! @abstract Do not call this function; instead use `acos` in C and + * Objective-C, and `simd::acos` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_acos(simd_double8 x); + +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_asin(simd_float2 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_asin(simd_float3 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_asin(simd_float4 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_asin(simd_float8 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_asin(simd_float16 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_asin(simd_double2 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_asin(simd_double3 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_asin(simd_double4 x); +/*! @abstract Do not call this function; instead use `asin` in C and + * Objective-C, and `simd::asin` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_asin(simd_double8 x); + +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_atan(simd_float2 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_atan(simd_float3 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_atan(simd_float4 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_atan(simd_float8 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_atan(simd_float16 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_atan(simd_double2 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_atan(simd_double3 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_atan(simd_double4 x); +/*! @abstract Do not call this function; instead use `atan` in C and + * Objective-C, and `simd::atan` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_atan(simd_double8 x); + +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cos(simd_float2 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cos(simd_float3 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cos(simd_float8 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cos(simd_float16 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cos(simd_double3 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cos(simd_double4 x); +/*! @abstract Do not call this function; instead use `cos` in C and + * Objective-C, and `simd::cos` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cos(simd_double8 x); + +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sin(simd_float2 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sin(simd_float3 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sin(simd_float8 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sin(simd_float16 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sin(simd_double3 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sin(simd_double4 x); +/*! @abstract Do not call this function; instead use `sin` in C and + * Objective-C, and `simd::sin` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sin(simd_double8 x); + +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tan(simd_float2 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tan(simd_float3 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tan(simd_float4 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tan(simd_float8 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tan(simd_float16 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tan(simd_double2 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tan(simd_double3 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tan(simd_double4 x); +/*! @abstract Do not call this function; instead use `tan` in C and + * Objective-C, and `simd::tan` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tan(simd_double8 x); + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cospi(simd_float2 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cospi(simd_float3 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cospi(simd_float4 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cospi(simd_float8 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cospi(simd_float16 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cospi(simd_double2 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cospi(simd_double3 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cospi(simd_double4 x); +/*! @abstract Do not call this function; instead use `cospi` in C and + * Objective-C, and `simd::cospi` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cospi(simd_double8 x); +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sinpi(simd_float2 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sinpi(simd_float3 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sinpi(simd_float4 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sinpi(simd_float8 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sinpi(simd_float16 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sinpi(simd_double2 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sinpi(simd_double3 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sinpi(simd_double4 x); +/*! @abstract Do not call this function; instead use `sinpi` in C and + * Objective-C, and `simd::sinpi` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sinpi(simd_double8 x); +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tanpi(simd_float2 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tanpi(simd_float3 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tanpi(simd_float4 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tanpi(simd_float8 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tanpi(simd_float16 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tanpi(simd_double2 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tanpi(simd_double3 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tanpi(simd_double4 x); +/*! @abstract Do not call this function; instead use `tanpi` in C and + * Objective-C, and `simd::tanpi` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tanpi(simd_double8 x); +#endif + +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_acosh(simd_float2 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_acosh(simd_float3 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_acosh(simd_float4 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_acosh(simd_float8 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_acosh(simd_float16 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_acosh(simd_double2 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_acosh(simd_double3 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_acosh(simd_double4 x); +/*! @abstract Do not call this function; instead use `acosh` in C and + * Objective-C, and `simd::acosh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_acosh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_asinh(simd_float2 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_asinh(simd_float3 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_asinh(simd_float4 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_asinh(simd_float8 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_asinh(simd_float16 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_asinh(simd_double2 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_asinh(simd_double3 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_asinh(simd_double4 x); +/*! @abstract Do not call this function; instead use `asinh` in C and + * Objective-C, and `simd::asinh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_asinh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_atanh(simd_float2 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_atanh(simd_float3 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_atanh(simd_float4 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_atanh(simd_float8 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_atanh(simd_float16 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_atanh(simd_double2 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_atanh(simd_double3 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_atanh(simd_double4 x); +/*! @abstract Do not call this function; instead use `atanh` in C and + * Objective-C, and `simd::atanh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_atanh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cosh(simd_float2 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cosh(simd_float3 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cosh(simd_float4 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cosh(simd_float8 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cosh(simd_float16 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cosh(simd_double2 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cosh(simd_double3 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cosh(simd_double4 x); +/*! @abstract Do not call this function; instead use `cosh` in C and + * Objective-C, and `simd::cosh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cosh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sinh(simd_float2 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sinh(simd_float3 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sinh(simd_float4 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sinh(simd_float8 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sinh(simd_float16 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sinh(simd_double2 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sinh(simd_double3 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sinh(simd_double4 x); +/*! @abstract Do not call this function; instead use `sinh` in C and + * Objective-C, and `simd::sinh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sinh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tanh(simd_float2 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tanh(simd_float3 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tanh(simd_float4 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tanh(simd_float8 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tanh(simd_float16 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tanh(simd_double2 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tanh(simd_double3 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tanh(simd_double4 x); +/*! @abstract Do not call this function; instead use `tanh` in C and + * Objective-C, and `simd::tanh` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tanh(simd_double8 x); + +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_exp(simd_float2 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_exp(simd_float3 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_exp(simd_float4 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_exp(simd_float8 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_exp(simd_float16 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_exp(simd_double2 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_exp(simd_double3 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_exp(simd_double4 x); +/*! @abstract Do not call this function; instead use `exp` in C and + * Objective-C, and `simd::exp` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_exp(simd_double8 x); + +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_exp2(simd_float2 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_exp2(simd_float3 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_exp2(simd_float4 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_exp2(simd_float8 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_exp2(simd_float16 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_exp2(simd_double2 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_exp2(simd_double3 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_exp2(simd_double4 x); +/*! @abstract Do not call this function; instead use `exp2` in C and + * Objective-C, and `simd::exp2` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_exp2(simd_double8 x); + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_exp10(simd_float2 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_exp10(simd_float3 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_exp10(simd_float4 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_exp10(simd_float8 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_exp10(simd_float16 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_exp10(simd_double2 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_exp10(simd_double3 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_exp10(simd_double4 x); +/*! @abstract Do not call this function; instead use `exp10` in C and + * Objective-C, and `simd::exp10` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_exp10(simd_double8 x); +#endif + +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_expm1(simd_float2 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_expm1(simd_float3 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_expm1(simd_float4 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_expm1(simd_float8 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_expm1(simd_float16 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_expm1(simd_double2 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_expm1(simd_double3 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_expm1(simd_double4 x); +/*! @abstract Do not call this function; instead use `expm1` in C and + * Objective-C, and `simd::expm1` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_expm1(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log(simd_float2 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log(simd_float3 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log(simd_float4 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log(simd_float8 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log(simd_float16 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log(simd_double2 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log(simd_double3 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log(simd_double4 x); +/*! @abstract Do not call this function; instead use `log` in C and + * Objective-C, and `simd::log` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log2(simd_float2 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log2(simd_float3 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log2(simd_float4 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log2(simd_float8 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log2(simd_float16 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log2(simd_double2 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log2(simd_double3 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log2(simd_double4 x); +/*! @abstract Do not call this function; instead use `log2` in C and + * Objective-C, and `simd::log2` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log2(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log10(simd_float2 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log10(simd_float3 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log10(simd_float4 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log10(simd_float8 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log10(simd_float16 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log10(simd_double2 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log10(simd_double3 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log10(simd_double4 x); +/*! @abstract Do not call this function; instead use `log10` in C and + * Objective-C, and `simd::log10` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log10(simd_double8 x); + +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_log1p(simd_float2 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_log1p(simd_float3 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_log1p(simd_float4 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_log1p(simd_float8 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_log1p(simd_float16 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_log1p(simd_double2 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_log1p(simd_double3 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_log1p(simd_double4 x); +/*! @abstract Do not call this function; instead use `log1p` in C and + * Objective-C, and `simd::log1p` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_log1p(simd_double8 x); + +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fabs(simd_float2 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fabs(simd_float3 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fabs(simd_float4 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fabs(simd_float8 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fabs(simd_float16 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fabs(simd_double2 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fabs(simd_double3 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fabs(simd_double4 x); +/*! @abstract Do not call this function; instead use `fabs` in C and + * Objective-C, and `simd::fabs` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fabs(simd_double8 x); + +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_cbrt(simd_float2 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_cbrt(simd_float3 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_cbrt(simd_float4 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_cbrt(simd_float8 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_cbrt(simd_float16 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_cbrt(simd_double2 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_cbrt(simd_double3 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_cbrt(simd_double4 x); +/*! @abstract Do not call this function; instead use `cbrt` in C and + * Objective-C, and `simd::cbrt` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_cbrt(simd_double8 x); + +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_sqrt(simd_float2 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_sqrt(simd_float3 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_sqrt(simd_float4 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_sqrt(simd_float8 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_sqrt(simd_float16 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_sqrt(simd_double2 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_sqrt(simd_double3 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_sqrt(simd_double4 x); +/*! @abstract Do not call this function; instead use `sqrt` in C and + * Objective-C, and `simd::sqrt` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_sqrt(simd_double8 x); + +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_erf(simd_float2 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_erf(simd_float3 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_erf(simd_float4 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_erf(simd_float8 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_erf(simd_float16 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_erf(simd_double2 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_erf(simd_double3 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_erf(simd_double4 x); +/*! @abstract Do not call this function; instead use `erf` in C and + * Objective-C, and `simd::erf` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_erf(simd_double8 x); + +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_erfc(simd_float2 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_erfc(simd_float3 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_erfc(simd_float4 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_erfc(simd_float8 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_erfc(simd_float16 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_erfc(simd_double2 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_erfc(simd_double3 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_erfc(simd_double4 x); +/*! @abstract Do not call this function; instead use `erfc` in C and + * Objective-C, and `simd::erfc` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_erfc(simd_double8 x); + +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_tgamma(simd_float2 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_tgamma(simd_float3 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_tgamma(simd_float4 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_tgamma(simd_float8 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_tgamma(simd_float16 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_tgamma(simd_double2 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_tgamma(simd_double3 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_tgamma(simd_double4 x); +/*! @abstract Do not call this function; instead use `tgamma` in C and + * Objective-C, and `simd::tgamma` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_tgamma(simd_double8 x); + +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_lgamma(simd_float2 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_lgamma(simd_float3 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_lgamma(simd_float4 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_lgamma(simd_float8 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_lgamma(simd_float16 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_lgamma(simd_double2 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_lgamma(simd_double3 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_lgamma(simd_double4 x); +/*! @abstract Do not call this function; instead use `lgamma` in C and + * Objective-C, and `simd::lgamma` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_lgamma(simd_double8 x); + +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_ceil(simd_float2 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_ceil(simd_float3 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_ceil(simd_float4 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_ceil(simd_float8 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_ceil(simd_float16 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_ceil(simd_double2 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_ceil(simd_double3 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_ceil(simd_double4 x); +/*! @abstract Do not call this function; instead use `ceil` in C and + * Objective-C, and `simd::ceil` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_ceil(simd_double8 x); + +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_floor(simd_float2 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_floor(simd_float3 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_floor(simd_float4 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_floor(simd_float8 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_floor(simd_float16 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_floor(simd_double2 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_floor(simd_double3 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_floor(simd_double4 x); +/*! @abstract Do not call this function; instead use `floor` in C and + * Objective-C, and `simd::floor` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_floor(simd_double8 x); + +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_rint(simd_float2 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_rint(simd_float3 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_rint(simd_float4 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_rint(simd_float8 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_rint(simd_float16 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_rint(simd_double2 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_rint(simd_double3 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_rint(simd_double4 x); +/*! @abstract Do not call this function; instead use `rint` in C and + * Objective-C, and `simd::rint` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_rint(simd_double8 x); + +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_round(simd_float2 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_round(simd_float3 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_round(simd_float4 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_round(simd_float8 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_round(simd_float16 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_round(simd_double2 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_round(simd_double3 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_round(simd_double4 x); +/*! @abstract Do not call this function; instead use `round` in C and + * Objective-C, and `simd::round` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_round(simd_double8 x); + +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_trunc(simd_float2 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_trunc(simd_float3 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_trunc(simd_float4 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_trunc(simd_float8 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_trunc(simd_float16 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_trunc(simd_double2 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_trunc(simd_double3 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_trunc(simd_double4 x); +/*! @abstract Do not call this function; instead use `trunc` in C and + * Objective-C, and `simd::trunc` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_trunc(simd_double8 x); + +#if SIMD_LIBRARY_VERSION >= 5 +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_float2 x, simd_float2 *sinp, simd_float2 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_float3 x, simd_float3 *sinp, simd_float3 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_float4 x, simd_float4 *sinp, simd_float4 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_float8 x, simd_float8 *sinp, simd_float8 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_float16 x, simd_float16 *sinp, simd_float16 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_double2 x, simd_double2 *sinp, simd_double2 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_double3 x, simd_double3 *sinp, simd_double3 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_double4 x, simd_double4 *sinp, simd_double4 *cosp); +/*! @abstract Do not call this function; instead use `sincos` in C and + * Objective-C, and `simd::sincos` in C++. */ +static inline SIMD_NONCONST void __tg_sincos(simd_double8 x, simd_double8 *sinp, simd_double8 *cosp); + +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_float2 x, simd_float2 *sinp, simd_float2 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_float3 x, simd_float3 *sinp, simd_float3 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_float4 x, simd_float4 *sinp, simd_float4 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_float8 x, simd_float8 *sinp, simd_float8 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_float16 x, simd_float16 *sinp, simd_float16 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_double2 x, simd_double2 *sinp, simd_double2 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_double3 x, simd_double3 *sinp, simd_double3 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_double4 x, simd_double4 *sinp, simd_double4 *cosp); +/*! @abstract Do not call this function; instead use `sincospi` in C and + * Objective-C, and `simd::sincospi` in C++. */ +static inline SIMD_NONCONST void __tg_sincospi(simd_double8 x, simd_double8 *sinp, simd_double8 *cosp); + +#endif +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_int2 __tg_isfinite(simd_float2 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_int3 __tg_isfinite(simd_float3 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_int4 __tg_isfinite(simd_float4 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_int8 __tg_isfinite(simd_float8 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_int16 __tg_isfinite(simd_float16 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_long2 __tg_isfinite(simd_double2 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_long3 __tg_isfinite(simd_double3 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_long4 __tg_isfinite(simd_double4 x); +/*! @abstract Do not call this function; instead use `isfinite` in C and + * Objective-C, and `simd::isfinite` in C++. */ +static inline SIMD_CFUNC simd_long8 __tg_isfinite(simd_double8 x); + +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_int2 __tg_isinf(simd_float2 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_int3 __tg_isinf(simd_float3 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_int4 __tg_isinf(simd_float4 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_int8 __tg_isinf(simd_float8 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_int16 __tg_isinf(simd_float16 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_long2 __tg_isinf(simd_double2 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_long3 __tg_isinf(simd_double3 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_long4 __tg_isinf(simd_double4 x); +/*! @abstract Do not call this function; instead use `isinf` in C and + * Objective-C, and `simd::isinf` in C++. */ +static inline SIMD_CFUNC simd_long8 __tg_isinf(simd_double8 x); + +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_int2 __tg_isnan(simd_float2 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_int3 __tg_isnan(simd_float3 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_int4 __tg_isnan(simd_float4 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_int8 __tg_isnan(simd_float8 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_int16 __tg_isnan(simd_float16 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_long2 __tg_isnan(simd_double2 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_long3 __tg_isnan(simd_double3 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_long4 __tg_isnan(simd_double4 x); +/*! @abstract Do not call this function; instead use `isnan` in C and + * Objective-C, and `simd::isnan` in C++. */ +static inline SIMD_CFUNC simd_long8 __tg_isnan(simd_double8 x); + +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_int2 __tg_isnormal(simd_float2 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_int3 __tg_isnormal(simd_float3 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_int4 __tg_isnormal(simd_float4 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_int8 __tg_isnormal(simd_float8 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_int16 __tg_isnormal(simd_float16 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_long2 __tg_isnormal(simd_double2 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_long3 __tg_isnormal(simd_double3 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_long4 __tg_isnormal(simd_double4 x); +/*! @abstract Do not call this function; instead use `isnormal` in C and + * Objective-C, and `simd::isnormal` in C++. */ +static inline SIMD_CFUNC simd_long8 __tg_isnormal(simd_double8 x); + + +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_atan2(simd_float2 y, simd_float2 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_atan2(simd_float3 y, simd_float3 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_atan2(simd_float4 y, simd_float4 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_atan2(simd_float8 y, simd_float8 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_atan2(simd_float16 y, simd_float16 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_atan2(simd_double2 y, simd_double2 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_atan2(simd_double3 y, simd_double3 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_atan2(simd_double4 y, simd_double4 x); +/*! @abstract Do not call this function; instead use `atan2` in C and + * Objective-C, and `simd::atan2` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_atan2(simd_double8 y, simd_double8 x); + +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_hypot(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_hypot(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_hypot(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_hypot(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_hypot(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_hypot(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_hypot(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_hypot(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `hypot` in C and + * Objective-C, and `simd::hypot` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_hypot(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_pow(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_pow(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_pow(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_pow(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_pow(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_pow(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_pow(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_pow(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `pow` in C and + * Objective-C, and `simd::pow` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_pow(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fmod(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fmod(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fmod(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fmod(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fmod(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fmod(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fmod(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fmod(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fmod` in C and + * Objective-C, and `simd::fmod` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fmod(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_remainder(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_remainder(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_remainder(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_remainder(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_remainder(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_remainder(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_remainder(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_remainder(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `remainder` in C and + * Objective-C, and `simd::remainder` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_remainder(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_copysign(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_copysign(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_copysign(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_copysign(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_copysign(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_copysign(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_copysign(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_copysign(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `copysign` in C and + * Objective-C, and `simd::copysign` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_copysign(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_nextafter(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_nextafter(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_nextafter(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_nextafter(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_nextafter(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_nextafter(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_nextafter(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_nextafter(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `nextafter` in C and + * Objective-C, and `simd::nextafter` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_nextafter(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fdim(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fdim(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fdim(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fdim(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fdim(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fdim(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fdim(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fdim(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fdim` in C and + * Objective-C, and `simd::fdim` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fdim(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fmax(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fmax(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fmax(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fmax(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fmax(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fmax(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fmax(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fmax(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fmax` in C and + * Objective-C, and `simd::fmax` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fmax(simd_double8 x, simd_double8 y); + +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fmin(simd_float2 x, simd_float2 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fmin(simd_float3 x, simd_float3 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fmin(simd_float4 x, simd_float4 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fmin(simd_float8 x, simd_float8 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fmin(simd_float16 x, simd_float16 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fmin(simd_double2 x, simd_double2 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fmin(simd_double3 x, simd_double3 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fmin(simd_double4 x, simd_double4 y); +/*! @abstract Do not call this function; instead use `fmin` in C and + * Objective-C, and `simd::fmin` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fmin(simd_double8 x, simd_double8 y); + + +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float2 __tg_fma(simd_float2 x, simd_float2 y, simd_float2 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float3 __tg_fma(simd_float3 x, simd_float3 y, simd_float3 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float4 __tg_fma(simd_float4 x, simd_float4 y, simd_float4 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float8 __tg_fma(simd_float8 x, simd_float8 y, simd_float8 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_float16 __tg_fma(simd_float16 x, simd_float16 y, simd_float16 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double2 __tg_fma(simd_double2 x, simd_double2 y, simd_double2 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double3 __tg_fma(simd_double3 x, simd_double3 y, simd_double3 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double4 __tg_fma(simd_double4 x, simd_double4 y, simd_double4 z); +/*! @abstract Do not call this function; instead use `fma` in C and Objective-C, + * and `simd::fma` in C++. */ +static inline SIMD_CFUNC simd_double8 __tg_fma(simd_double8 x, simd_double8 y, simd_double8 z); + +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC float simd_muladd(float x, float y, float z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float2 simd_muladd(simd_float2 x, simd_float2 y, simd_float2 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float3 simd_muladd(simd_float3 x, simd_float3 y, simd_float3 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float4 simd_muladd(simd_float4 x, simd_float4 y, simd_float4 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float8 simd_muladd(simd_float8 x, simd_float8 y, simd_float8 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_float16 simd_muladd(simd_float16 x, simd_float16 y, simd_float16 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC double simd_muladd(double x, double y, double z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double2 simd_muladd(simd_double2 x, simd_double2 y, simd_double2 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double3 simd_muladd(simd_double3 x, simd_double3 y, simd_double3 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double4 simd_muladd(simd_double4 x, simd_double4 y, simd_double4 z); +/*! @abstract Computes accum + x*y by the most efficient means available; + * either a fused multiply add or separate multiply and add instructions. */ +static inline SIMD_CFUNC simd_double8 simd_muladd(simd_double8 x, simd_double8 y, simd_double8 z); + +#ifdef __cplusplus +} /* extern "C" */ + +#include +/*! @abstract Do not call this function directly; use simd::acos instead. */ +static SIMD_CPPFUNC float __tg_acos(float x) { return ::acosf(x); } +/*! @abstract Do not call this function directly; use simd::acos instead. */ +static SIMD_CPPFUNC double __tg_acos(double x) { return ::acos(x); } +/*! @abstract Do not call this function directly; use simd::asin instead. */ +static SIMD_CPPFUNC float __tg_asin(float x) { return ::asinf(x); } +/*! @abstract Do not call this function directly; use simd::asin instead. */ +static SIMD_CPPFUNC double __tg_asin(double x) { return ::asin(x); } +/*! @abstract Do not call this function directly; use simd::atan instead. */ +static SIMD_CPPFUNC float __tg_atan(float x) { return ::atanf(x); } +/*! @abstract Do not call this function directly; use simd::atan instead. */ +static SIMD_CPPFUNC double __tg_atan(double x) { return ::atan(x); } +/*! @abstract Do not call this function directly; use simd::cos instead. */ +static SIMD_CPPFUNC float __tg_cos(float x) { return ::cosf(x); } +/*! @abstract Do not call this function directly; use simd::cos instead. */ +static SIMD_CPPFUNC double __tg_cos(double x) { return ::cos(x); } +/*! @abstract Do not call this function directly; use simd::sin instead. */ +static SIMD_CPPFUNC float __tg_sin(float x) { return ::sinf(x); } +/*! @abstract Do not call this function directly; use simd::sin instead. */ +static SIMD_CPPFUNC double __tg_sin(double x) { return ::sin(x); } +/*! @abstract Do not call this function directly; use simd::tan instead. */ +static SIMD_CPPFUNC float __tg_tan(float x) { return ::tanf(x); } +/*! @abstract Do not call this function directly; use simd::tan instead. */ +static SIMD_CPPFUNC double __tg_tan(double x) { return ::tan(x); } +/*! @abstract Do not call this function directly; use simd::cospi instead. */ +static SIMD_CPPFUNC float __tg_cospi(float x) { return ::__cospif(x); } +/*! @abstract Do not call this function directly; use simd::cospi instead. */ +static SIMD_CPPFUNC double __tg_cospi(double x) { return ::__cospi(x); } +/*! @abstract Do not call this function directly; use simd::sinpi instead. */ +static SIMD_CPPFUNC float __tg_sinpi(float x) { return ::__sinpif(x); } +/*! @abstract Do not call this function directly; use simd::sinpi instead. */ +static SIMD_CPPFUNC double __tg_sinpi(double x) { return ::__sinpi(x); } +/*! @abstract Do not call this function directly; use simd::tanpi instead. */ +static SIMD_CPPFUNC float __tg_tanpi(float x) { return ::__tanpif(x); } +/*! @abstract Do not call this function directly; use simd::tanpi instead. */ +static SIMD_CPPFUNC double __tg_tanpi(double x) { return ::__tanpi(x); } +/*! @abstract Do not call this function directly; use simd::acosh instead. */ +static SIMD_CPPFUNC float __tg_acosh(float x) { return ::acoshf(x); } +/*! @abstract Do not call this function directly; use simd::acosh instead. */ +static SIMD_CPPFUNC double __tg_acosh(double x) { return ::acosh(x); } +/*! @abstract Do not call this function directly; use simd::asinh instead. */ +static SIMD_CPPFUNC float __tg_asinh(float x) { return ::asinhf(x); } +/*! @abstract Do not call this function directly; use simd::asinh instead. */ +static SIMD_CPPFUNC double __tg_asinh(double x) { return ::asinh(x); } +/*! @abstract Do not call this function directly; use simd::atanh instead. */ +static SIMD_CPPFUNC float __tg_atanh(float x) { return ::atanhf(x); } +/*! @abstract Do not call this function directly; use simd::atanh instead. */ +static SIMD_CPPFUNC double __tg_atanh(double x) { return ::atanh(x); } +/*! @abstract Do not call this function directly; use simd::cosh instead. */ +static SIMD_CPPFUNC float __tg_cosh(float x) { return ::coshf(x); } +/*! @abstract Do not call this function directly; use simd::cosh instead. */ +static SIMD_CPPFUNC double __tg_cosh(double x) { return ::cosh(x); } +/*! @abstract Do not call this function directly; use simd::sinh instead. */ +static SIMD_CPPFUNC float __tg_sinh(float x) { return ::sinhf(x); } +/*! @abstract Do not call this function directly; use simd::sinh instead. */ +static SIMD_CPPFUNC double __tg_sinh(double x) { return ::sinh(x); } +/*! @abstract Do not call this function directly; use simd::tanh instead. */ +static SIMD_CPPFUNC float __tg_tanh(float x) { return ::tanhf(x); } +/*! @abstract Do not call this function directly; use simd::tanh instead. */ +static SIMD_CPPFUNC double __tg_tanh(double x) { return ::tanh(x); } +/*! @abstract Do not call this function directly; use simd::exp instead. */ +static SIMD_CPPFUNC float __tg_exp(float x) { return ::expf(x); } +/*! @abstract Do not call this function directly; use simd::exp instead. */ +static SIMD_CPPFUNC double __tg_exp(double x) { return ::exp(x); } +/*! @abstract Do not call this function directly; use simd::exp2 instead. */ +static SIMD_CPPFUNC float __tg_exp2(float x) { return ::exp2f(x); } +/*! @abstract Do not call this function directly; use simd::exp2 instead. */ +static SIMD_CPPFUNC double __tg_exp2(double x) { return ::exp2(x); } +/*! @abstract Do not call this function directly; use simd::exp10 instead. */ +static SIMD_CPPFUNC float __tg_exp10(float x) { return ::__exp10f(x); } +/*! @abstract Do not call this function directly; use simd::exp10 instead. */ +static SIMD_CPPFUNC double __tg_exp10(double x) { return ::__exp10(x); } +/*! @abstract Do not call this function directly; use simd::expm1 instead. */ +static SIMD_CPPFUNC float __tg_expm1(float x) { return ::expm1f(x); } +/*! @abstract Do not call this function directly; use simd::expm1 instead. */ +static SIMD_CPPFUNC double __tg_expm1(double x) { return ::expm1(x); } +/*! @abstract Do not call this function directly; use simd::log instead. */ +static SIMD_CPPFUNC float __tg_log(float x) { return ::logf(x); } +/*! @abstract Do not call this function directly; use simd::log instead. */ +static SIMD_CPPFUNC double __tg_log(double x) { return ::log(x); } +/*! @abstract Do not call this function directly; use simd::log2 instead. */ +static SIMD_CPPFUNC float __tg_log2(float x) { return ::log2f(x); } +/*! @abstract Do not call this function directly; use simd::log2 instead. */ +static SIMD_CPPFUNC double __tg_log2(double x) { return ::log2(x); } +/*! @abstract Do not call this function directly; use simd::log10 instead. */ +static SIMD_CPPFUNC float __tg_log10(float x) { return ::log10f(x); } +/*! @abstract Do not call this function directly; use simd::log10 instead. */ +static SIMD_CPPFUNC double __tg_log10(double x) { return ::log10(x); } +/*! @abstract Do not call this function directly; use simd::log1p instead. */ +static SIMD_CPPFUNC float __tg_log1p(float x) { return ::log1pf(x); } +/*! @abstract Do not call this function directly; use simd::log1p instead. */ +static SIMD_CPPFUNC double __tg_log1p(double x) { return ::log1p(x); } +/*! @abstract Do not call this function directly; use simd::fabs instead. */ +static SIMD_CPPFUNC float __tg_fabs(float x) { return ::fabsf(x); } +/*! @abstract Do not call this function directly; use simd::fabs instead. */ +static SIMD_CPPFUNC double __tg_fabs(double x) { return ::fabs(x); } +/*! @abstract Do not call this function directly; use simd::cbrt instead. */ +static SIMD_CPPFUNC float __tg_cbrt(float x) { return ::cbrtf(x); } +/*! @abstract Do not call this function directly; use simd::cbrt instead. */ +static SIMD_CPPFUNC double __tg_cbrt(double x) { return ::cbrt(x); } +/*! @abstract Do not call this function directly; use simd::sqrt instead. */ +static SIMD_CPPFUNC float __tg_sqrt(float x) { return ::sqrtf(x); } +/*! @abstract Do not call this function directly; use simd::sqrt instead. */ +static SIMD_CPPFUNC double __tg_sqrt(double x) { return ::sqrt(x); } +/*! @abstract Do not call this function directly; use simd::erf instead. */ +static SIMD_CPPFUNC float __tg_erf(float x) { return ::erff(x); } +/*! @abstract Do not call this function directly; use simd::erf instead. */ +static SIMD_CPPFUNC double __tg_erf(double x) { return ::erf(x); } +/*! @abstract Do not call this function directly; use simd::erfc instead. */ +static SIMD_CPPFUNC float __tg_erfc(float x) { return ::erfcf(x); } +/*! @abstract Do not call this function directly; use simd::erfc instead. */ +static SIMD_CPPFUNC double __tg_erfc(double x) { return ::erfc(x); } +/*! @abstract Do not call this function directly; use simd::tgamma instead. */ +static SIMD_CPPFUNC float __tg_tgamma(float x) { return ::tgammaf(x); } +/*! @abstract Do not call this function directly; use simd::tgamma instead. */ +static SIMD_CPPFUNC double __tg_tgamma(double x) { return ::tgamma(x); } +/*! @abstract Do not call this function directly; use simd::lgamma instead. */ +static SIMD_CPPFUNC float __tg_lgamma(float x) { return ::lgammaf(x); } +/*! @abstract Do not call this function directly; use simd::lgamma instead. */ +static SIMD_CPPFUNC double __tg_lgamma(double x) { return ::lgamma(x); } +/*! @abstract Do not call this function directly; use simd::ceil instead. */ +static SIMD_CPPFUNC float __tg_ceil(float x) { return ::ceilf(x); } +/*! @abstract Do not call this function directly; use simd::ceil instead. */ +static SIMD_CPPFUNC double __tg_ceil(double x) { return ::ceil(x); } +/*! @abstract Do not call this function directly; use simd::floor instead. */ +static SIMD_CPPFUNC float __tg_floor(float x) { return ::floorf(x); } +/*! @abstract Do not call this function directly; use simd::floor instead. */ +static SIMD_CPPFUNC double __tg_floor(double x) { return ::floor(x); } +/*! @abstract Do not call this function directly; use simd::rint instead. */ +static SIMD_CPPFUNC float __tg_rint(float x) { return ::rintf(x); } +/*! @abstract Do not call this function directly; use simd::rint instead. */ +static SIMD_CPPFUNC double __tg_rint(double x) { return ::rint(x); } +/*! @abstract Do not call this function directly; use simd::round instead. */ +static SIMD_CPPFUNC float __tg_round(float x) { return ::roundf(x); } +/*! @abstract Do not call this function directly; use simd::round instead. */ +static SIMD_CPPFUNC double __tg_round(double x) { return ::round(x); } +/*! @abstract Do not call this function directly; use simd::trunc instead. */ +static SIMD_CPPFUNC float __tg_trunc(float x) { return ::truncf(x); } +/*! @abstract Do not call this function directly; use simd::trunc instead. */ +static SIMD_CPPFUNC double __tg_trunc(double x) { return ::trunc(x); } +#if SIMD_LIBRARY_VERSION >= 5 +/*! @abstract Do not call this function directly; use simd::sincos instead. */ +static SIMD_INLINE SIMD_NODEBUG void __tg_sincos(float x, float *sinp, float *cosp) { ::__sincosf(x, sinp, cosp); } +/*! @abstract Do not call this function directly; use simd::sincos instead. */ +static SIMD_INLINE SIMD_NODEBUG void __tg_sincos(double x, double *sinp, double *cosp) { ::__sincos(x, sinp, cosp); } +/*! @abstract Do not call this function directly; use simd::sincospi + * instead. */ +static SIMD_INLINE SIMD_NODEBUG void __tg_sincospi(float x, float *sinp, float *cosp) { ::__sincospif(x, sinp, cosp); } +/*! @abstract Do not call this function directly; use simd::sincospi + * instead. */ +static SIMD_INLINE SIMD_NODEBUG void __tg_sincospi(double x, double *sinp, double *cosp) { ::__sincospi(x, sinp, cosp); } +#endif +/*! @abstract Do not call this function directly; use simd::isfinite + * instead. */ +static SIMD_CPPFUNC float __tg_isfinite(float x) { return ::isfinite(x); } +/*! @abstract Do not call this function directly; use simd::isfinite + * instead. */ +static SIMD_CPPFUNC double __tg_isfinite(double x) { return ::isfinite(x); } +/*! @abstract Do not call this function directly; use simd::isinf instead. */ +static SIMD_CPPFUNC float __tg_isinf(float x) { return ::isinf(x); } +/*! @abstract Do not call this function directly; use simd::isinf instead. */ +static SIMD_CPPFUNC double __tg_isinf(double x) { return ::isinf(x); } +/*! @abstract Do not call this function directly; use simd::isnan instead. */ +static SIMD_CPPFUNC float __tg_isnan(float x) { return ::isnan(x); } +/*! @abstract Do not call this function directly; use simd::isnan instead. */ +static SIMD_CPPFUNC double __tg_isnan(double x) { return ::isnan(x); } +/*! @abstract Do not call this function directly; use simd::isnormal + * instead. */ +static SIMD_CPPFUNC float __tg_isnormal(float x) { return ::isnormal(x); } +/*! @abstract Do not call this function directly; use simd::isnormal + * instead. */ +static SIMD_CPPFUNC double __tg_isnormal(double x) { return ::isnormal(x); } +/*! @abstract Do not call this function directly; use simd::atan2 instead. */ +static SIMD_CPPFUNC float __tg_atan2(float x, float y) { return ::atan2f(x, y); } +/*! @abstract Do not call this function directly; use simd::atan2 instead. */ +static SIMD_CPPFUNC double __tg_atan2(double x, double y) { return ::atan2(x, y); } +/*! @abstract Do not call this function directly; use simd::hypot instead. */ +static SIMD_CPPFUNC float __tg_hypot(float x, float y) { return ::hypotf(x, y); } +/*! @abstract Do not call this function directly; use simd::hypot instead. */ +static SIMD_CPPFUNC double __tg_hypot(double x, double y) { return ::hypot(x, y); } +/*! @abstract Do not call this function directly; use simd::pow instead. */ +static SIMD_CPPFUNC float __tg_pow(float x, float y) { return ::powf(x, y); } +/*! @abstract Do not call this function directly; use simd::pow instead. */ +static SIMD_CPPFUNC double __tg_pow(double x, double y) { return ::pow(x, y); } +/*! @abstract Do not call this function directly; use simd::fmod instead. */ +static SIMD_CPPFUNC float __tg_fmod(float x, float y) { return ::fmodf(x, y); } +/*! @abstract Do not call this function directly; use simd::fmod instead. */ +static SIMD_CPPFUNC double __tg_fmod(double x, double y) { return ::fmod(x, y); } +/*! @abstract Do not call this function directly; use simd::remainder + * instead. */ +static SIMD_CPPFUNC float __tg_remainder(float x, float y) { return ::remainderf(x, y); } +/*! @abstract Do not call this function directly; use simd::remainder + * instead. */ +static SIMD_CPPFUNC double __tg_remainder(double x, double y) { return ::remainder(x, y); } +/*! @abstract Do not call this function directly; use simd::copysign + * instead. */ +static SIMD_CPPFUNC float __tg_copysign(float x, float y) { return ::copysignf(x, y); } +/*! @abstract Do not call this function directly; use simd::copysign + * instead. */ +static SIMD_CPPFUNC double __tg_copysign(double x, double y) { return ::copysign(x, y); } +/*! @abstract Do not call this function directly; use simd::nextafter + * instead. */ +static SIMD_CPPFUNC float __tg_nextafter(float x, float y) { return ::nextafterf(x, y); } +/*! @abstract Do not call this function directly; use simd::nextafter + * instead. */ +static SIMD_CPPFUNC double __tg_nextafter(double x, double y) { return ::nextafter(x, y); } +/*! @abstract Do not call this function directly; use simd::fdim instead. */ +static SIMD_CPPFUNC float __tg_fdim(float x, float y) { return ::fdimf(x, y); } +/*! @abstract Do not call this function directly; use simd::fdim instead. */ +static SIMD_CPPFUNC double __tg_fdim(double x, double y) { return ::fdim(x, y); } +/*! @abstract Do not call this function directly; use simd::fmax instead. */ +static SIMD_CPPFUNC float __tg_fmax(float x, float y) { return ::fmaxf(x, y); } +/*! @abstract Do not call this function directly; use simd::fmax instead. */ +static SIMD_CPPFUNC double __tg_fmax(double x, double y) { return ::fmax(x, y); } +/*! @abstract Do not call this function directly; use simd::fmin instead. */ +static SIMD_CPPFUNC float __tg_fmin(float x, float y) { return ::fminf(x, y); } +/*! @abstract Do not call this function directly; use simd::fmin instead. */ +static SIMD_CPPFUNC double __tg_fmin(double x, double y) { return ::fmin(x, y); } +/*! @abstract Do not call this function directly; use simd::fma instead. */ +static SIMD_CPPFUNC float __tg_fma(float x, float y, float z) { return ::fmaf(x, y, z); } +/*! @abstract Do not call this function directly; use simd::fma instead. */ +static SIMD_CPPFUNC double __tg_fma(double x, double y, double z) { return ::fma(x, y, z); } + +namespace simd { +/*! @abstract Generalizes the function acos to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN acos(fptypeN x) { return ::__tg_acos(x); } + +/*! @abstract Generalizes the function asin to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN asin(fptypeN x) { return ::__tg_asin(x); } + +/*! @abstract Generalizes the function atan to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN atan(fptypeN x) { return ::__tg_atan(x); } + +/*! @abstract Generalizes the function cos to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN cos(fptypeN x) { return ::__tg_cos(x); } + +/*! @abstract Generalizes the function sin to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN sin(fptypeN x) { return ::__tg_sin(x); } + +/*! @abstract Generalizes the function tan to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN tan(fptypeN x) { return ::__tg_tan(x); } + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the function cospi to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN cospi(fptypeN x) { return ::__tg_cospi(x); } +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the function sinpi to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN sinpi(fptypeN x) { return ::__tg_sinpi(x); } +#endif + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the function tanpi to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN tanpi(fptypeN x) { return ::__tg_tanpi(x); } +#endif + +/*! @abstract Generalizes the function acosh to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN acosh(fptypeN x) { return ::__tg_acosh(x); } + +/*! @abstract Generalizes the function asinh to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN asinh(fptypeN x) { return ::__tg_asinh(x); } + +/*! @abstract Generalizes the function atanh to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN atanh(fptypeN x) { return ::__tg_atanh(x); } + +/*! @abstract Generalizes the function cosh to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN cosh(fptypeN x) { return ::__tg_cosh(x); } + +/*! @abstract Generalizes the function sinh to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN sinh(fptypeN x) { return ::__tg_sinh(x); } + +/*! @abstract Generalizes the function tanh to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN tanh(fptypeN x) { return ::__tg_tanh(x); } + +/*! @abstract Generalizes the function exp to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN exp(fptypeN x) { return ::__tg_exp(x); } + +/*! @abstract Generalizes the function exp2 to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN exp2(fptypeN x) { return ::__tg_exp2(x); } + +#if SIMD_LIBRARY_VERSION >= 1 +/*! @abstract Generalizes the function exp10 to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN exp10(fptypeN x) { return ::__tg_exp10(x); } +#endif + +/*! @abstract Generalizes the function expm1 to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN expm1(fptypeN x) { return ::__tg_expm1(x); } + +/*! @abstract Generalizes the function log to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN log(fptypeN x) { return ::__tg_log(x); } + +/*! @abstract Generalizes the function log2 to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN log2(fptypeN x) { return ::__tg_log2(x); } + +/*! @abstract Generalizes the function log10 to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN log10(fptypeN x) { return ::__tg_log10(x); } + +/*! @abstract Generalizes the function log1p to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN log1p(fptypeN x) { return ::__tg_log1p(x); } + +/*! @abstract Generalizes the function fabs to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN fabs(fptypeN x) { return ::__tg_fabs(x); } + +/*! @abstract Generalizes the function cbrt to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN cbrt(fptypeN x) { return ::__tg_cbrt(x); } + +/*! @abstract Generalizes the function sqrt to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN sqrt(fptypeN x) { return ::__tg_sqrt(x); } + +/*! @abstract Generalizes the function erf to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN erf(fptypeN x) { return ::__tg_erf(x); } + +/*! @abstract Generalizes the function erfc to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN erfc(fptypeN x) { return ::__tg_erfc(x); } + +/*! @abstract Generalizes the function tgamma to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN tgamma(fptypeN x) { return ::__tg_tgamma(x); } + +/*! @abstract Generalizes the function lgamma to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN lgamma(fptypeN x) { return ::__tg_lgamma(x); } + +/*! @abstract Generalizes the function ceil to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN ceil(fptypeN x) { return ::__tg_ceil(x); } + +/*! @abstract Generalizes the function floor to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN floor(fptypeN x) { return ::__tg_floor(x); } + +/*! @abstract Generalizes the function rint to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN rint(fptypeN x) { return ::__tg_rint(x); } + +/*! @abstract Generalizes the function round to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN round(fptypeN x) { return ::__tg_round(x); } + +/*! @abstract Generalizes the function trunc to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN trunc(fptypeN x) { return ::__tg_trunc(x); } + +#if SIMD_LIBRARY_VERSION >= 5 +/*! @abstract Computes sincos more efficiently than separate computations. */ + template + static SIMD_INLINE SIMD_NODEBUG void sincos(fptypeN x, fptypeN *sinp, fptypeN *cosp) { ::__tg_sincos(x, sinp, cosp); } + +/*! @abstract Computes sincospi more efficiently than separate computations. */ + template + static SIMD_INLINE SIMD_NODEBUG void sincospi(fptypeN x, fptypeN *sinp, fptypeN *cosp) { ::__tg_sincospi(x, sinp, cosp); } + +#endif +/*! @abstract Generalizes the function isfinite to operate on + * vectors of floats and doubles. */ + template + static SIMD_CPPFUNC + typename std::enable_if::scalar_t>::value, typename traits::mask_t>::type + isfinite(fptypeN x) { return ::__tg_isfinite(x); } + +/*! @abstract Generalizes the function isinf to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC + typename std::enable_if::scalar_t>::value, typename traits::mask_t>::type + isinf(fptypeN x) { return ::__tg_isinf(x); } + +/*! @abstract Generalizes the function isnan to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC + typename std::enable_if::scalar_t>::value, typename traits::mask_t>::type + isnan(fptypeN x) { return ::__tg_isnan(x); } + +/*! @abstract Generalizes the function isnormal to operate on + * vectors of floats and doubles. */ + template + static SIMD_CPPFUNC + typename std::enable_if::scalar_t>::value, typename traits::mask_t>::type + isnormal(fptypeN x) { return ::__tg_isnormal(x); } + +/*! @abstract Generalizes the function atan2 to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN atan2(fptypeN y, fptypeN x) { return ::__tg_atan2(y, x); } + +/*! @abstract Generalizes the function hypot to operate on vectors + * of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN hypot(fptypeN x, fptypeN y) { return ::__tg_hypot(x, y); } + +/*! @abstract Generalizes the function pow to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN pow(fptypeN x, fptypeN y) { return ::__tg_pow(x, y); } + +/*! @abstract Generalizes the function fmod to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN fmod(fptypeN x, fptypeN y) { return ::__tg_fmod(x, y); } + +/*! @abstract Generalizes the function remainder to operate on + * vectors of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN remainder(fptypeN x, fptypeN y) { return ::__tg_remainder(x, y); } + +/*! @abstract Generalizes the function copysign to operate on + * vectors of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN copysign(fptypeN x, fptypeN y) { return ::__tg_copysign(x, y); } + +/*! @abstract Generalizes the function nextafter to operate on + * vectors of floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN nextafter(fptypeN x, fptypeN y) { return ::__tg_nextafter(x, y); } + +/*! @abstract Generalizes the function fdim to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN fdim(fptypeN x, fptypeN y) { return ::__tg_fdim(x, y); } + +/*! @abstract Generalizes the function fmax to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN fmax(fptypeN x, fptypeN y) { return ::__tg_fmax(x, y); } + +/*! @abstract Generalizes the function fmin to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN fmin(fptypeN x, fptypeN y) { return ::__tg_fmin(x, y); } + +/*! @abstract Generalizes the function fma to operate on vectors of + * floats and doubles. */ + template + static SIMD_CPPFUNC fptypeN fma(fptypeN x, fptypeN y, fptypeN z) { return ::__tg_fma(x, y, z); } + +/*! @abstract Computes x*y + z by the most efficient means available; either + * a fused multiply add or separate multiply and add. */ + template + static SIMD_CPPFUNC fptypeN muladd(fptypeN x, fptypeN y, fptypeN z) { return ::simd_muladd(x, y, z); } +}; + +extern "C" { +#else +#include +/* C and Objective-C, we need some infrastructure to piggyback on tgmath.h */ +static SIMD_OVERLOAD simd_float2 __tg_promote(simd_float2); +static SIMD_OVERLOAD simd_float3 __tg_promote(simd_float3); +static SIMD_OVERLOAD simd_float4 __tg_promote(simd_float4); +static SIMD_OVERLOAD simd_float8 __tg_promote(simd_float8); +static SIMD_OVERLOAD simd_float16 __tg_promote(simd_float16); +static SIMD_OVERLOAD simd_double2 __tg_promote(simd_double2); +static SIMD_OVERLOAD simd_double3 __tg_promote(simd_double3); +static SIMD_OVERLOAD simd_double4 __tg_promote(simd_double4); +static SIMD_OVERLOAD simd_double8 __tg_promote(simd_double8); + +/* Apple extensions to , added in macOS 10.9 and iOS 7.0 */ +#if __MAC_OS_X_VERSION_MIN_REQUIRED >= __MAC_10_9 || \ + __IPHONE_OS_VERSION_MIN_REQUIRED >= __IPHONE_7_0 || \ + __DRIVERKIT_VERSION_MIN_REQUIRED >= __DRIVERKIT_19_0 +static inline SIMD_CFUNC float __tg_cospi(float x) { return __cospif(x); } +static inline SIMD_CFUNC double __tg_cospi(double x) { return __cospi(x); } +#undef cospi +/*! @abstract `cospi(x)` computes `cos(pi * x)` without intermediate rounding. + * + * @discussion Both faster and more accurate than multiplying by `pi` and then + * calling `cos`. Defined for `float` and `double` as well as vectors of + * floats and doubles as provided by ``. */ +#define cospi(__x) __tg_cospi(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC float __tg_sinpi(float x) { return __sinpif(x); } +static inline SIMD_CFUNC double __tg_sinpi(double x) { return __sinpi(x); } +#undef sinpi +/*! @abstract `sinpi(x)` computes `sin(pi * x)` without intermediate rounding. + * + * @discussion Both faster and more accurate than multiplying by `pi` and then + * calling `sin`. Defined for `float` and `double` as well as vectors + * of floats and doubles as provided by ``. */ +#define sinpi(__x) __tg_sinpi(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC float __tg_tanpi(float x) { return __tanpif(x); } +static inline SIMD_CFUNC double __tg_tanpi(double x) { return __tanpi(x); } +#undef tanpi +/*! @abstract `tanpi(x)` computes `tan(pi * x)` without intermediate rounding. + * + * @discussion Both faster and more accurate than multiplying by `pi` and then + * calling `tan`. Defined for `float` and `double` as well as vectors of + * floats and doubles as provided by ``. */ +#define tanpi(__x) __tg_tanpi(__tg_promote1((__x))(__x)) + +#if SIMD_LIBRARY_VERSION >= 5 +static inline SIMD_NONCONST void __tg_sincos(float x, float *sinp, float *cosp) { __sincosf(x, sinp, cosp); } +static inline SIMD_NONCONST void __tg_sincos(double x, double *sinp, double *cosp) { __sincos(x, sinp, cosp); } +#undef sincos +/*! @abstract `sincos(x)` computes `sin(x)` and `cos(x)` more efficiently. + * + * @discussion Defined for `float` and `double` as well as vectors of + * floats and doubles as provided by ``. */ +#define sincos(__x, __sinp, __cosp) __tg_sincos(__tg_promote1((__x))(__x), __sinp, __cosp) + +static inline SIMD_NONCONST void __tg_sincospi(float x, float *sinp, float *cosp) { __sincospif(x, sinp, cosp); } +static inline SIMD_NONCONST void __tg_sincospi(double x, double *sinp, double *cosp) { __sincospi(x, sinp, cosp); } +#undef sincospi +/*! @abstract `sincospi(x)` computes `sin(pi * x)` and `cos(pi * x)` more efficiently. + * + * @discussion Defined for `float` and `double` as well as vectors of + * floats and doubles as provided by ``. */ +#define sincospi(__x, __sinp, __cosp) __tg_sincospi(__tg_promote1((__x))(__x), __sinp, __cosp) +#endif // SIMD_LIBRARY_VERSION >= 5 + +static inline SIMD_CFUNC float __tg_exp10(float x) { return __exp10f(x); } +static inline SIMD_CFUNC double __tg_exp10(double x) { return __exp10(x); } +#undef exp10 +/*! @abstract `exp10(x)` computes `10**x` more efficiently and accurately + * than `pow(10, x)`. + * + * @discussion Defined for `float` and `double` as well as vectors of floats + * and doubles as provided by ``. */ +#define exp10(__x) __tg_exp10(__tg_promote1((__x))(__x)) +#endif + +#if (defined(__GNUC__) && 0 == __FINITE_MATH_ONLY__) +static inline SIMD_CFUNC int __tg_isfinite(float x) { return __inline_isfinitef(x); } +static inline SIMD_CFUNC int __tg_isfinite(double x) { return __inline_isfinited(x); } +static inline SIMD_CFUNC int __tg_isfinite(long double x) { return __inline_isfinitel(x); } +#undef isfinite +/*! @abstract `__tg_isfinite(x)` determines if x is a finite value. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isfinite(__x) __tg_isfinite(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC int __tg_isinf(float x) { return __inline_isinff(x); } +static inline SIMD_CFUNC int __tg_isinf(double x) { return __inline_isinfd(x); } +static inline SIMD_CFUNC int __tg_isinf(long double x) { return __inline_isinfl(x); } +#undef isinf +/*! @abstract `__tg_isinf(x)` determines if x is positive or negative infinity. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isinf(__x) __tg_isinf(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC int __tg_isnan(float x) { return __inline_isnanf(x); } +static inline SIMD_CFUNC int __tg_isnan(double x) { return __inline_isnand(x); } +static inline SIMD_CFUNC int __tg_isnan(long double x) { return __inline_isnanl(x); } +#undef isnan +/*! @abstract `__tg_isnan(x)` determines if x is a not-a-number (NaN) value. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isnan(__x) __tg_isnan(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC int __tg_isnormal(float x) { return __inline_isnormalf(x); } +static inline SIMD_CFUNC int __tg_isnormal(double x) { return __inline_isnormald(x); } +static inline SIMD_CFUNC int __tg_isnormal(long double x) { return __inline_isnormall(x); } +#undef isnormal +/*! @abstract `__tg_isnormal(x)` determines if x is a normal value. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isnormal(__x) __tg_isnormal(__tg_promote1((__x))(__x)) + +#else /* defined(__GNUC__) && 0 == __FINITE_MATH_ONLY__ */ + +static inline SIMD_CFUNC int __tg_isfinite(float x) { return __isfinitef(x); } +static inline SIMD_CFUNC int __tg_isfinite(double x) { return __isfinited(x); } +static inline SIMD_CFUNC int __tg_isfinite(long double x) { return __isfinitel(x); } +#undef isfinite +/*! @abstract `__tg_isfinite(x)` determines if x is a finite value. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isfinite(__x) __tg_isfinite(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC int __tg_isinf(float x) { return __isinff(x); } +static inline SIMD_CFUNC int __tg_isinf(double x) { return __isinfd(x); } +static inline SIMD_CFUNC int __tg_isinf(long double x) { return __isinfl(x); } +#undef isinf +/*! @abstract `__tg_isinf(x)` determines if x is positive or negative infinity. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isinf(__x) __tg_isinf(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC int __tg_isnan(float x) { return __isnanf(x); } +static inline SIMD_CFUNC int __tg_isnan(double x) { return __isnand(x); } +static inline SIMD_CFUNC int __tg_isnan(long double x) { return __isnanl(x); } +#undef isnan +/*! @abstract `__tg_isnan(x)` determines if x is a not-a-number (NaN) value. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isnan(__x) __tg_isnan(__tg_promote1((__x))(__x)) + +static inline SIMD_CFUNC int __tg_isnormal(float x) { return __isnormalf(x); } +static inline SIMD_CFUNC int __tg_isnormal(double x) { return __isnormald(x); } +static inline SIMD_CFUNC int __tg_isnormal(long double x) { return __isnormall(x); } +#undef isnormal +/*! @abstract `__tg_isnormal(x)` determines if x is a normal value. + * + * @discussion Defined for `float`, `double` and `long double` as well as vectors of floats + * and doubles as provided by ``. */ +#define isnormal(__x) __tg_isnormal(__tg_promote1((__x))(__x)) +#endif /* defined(__GNUC__) && 0 == __FINITE_MATH_ONLY__ */ +#endif /* !__cplusplus */ + +#pragma mark - fabs implementation +static inline SIMD_CFUNC simd_float2 __tg_fabs(simd_float2 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float3 __tg_fabs(simd_float3 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float4 __tg_fabs(simd_float4 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float8 __tg_fabs(simd_float8 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float16 __tg_fabs(simd_float16 x) { return simd_bitselect(0.0, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_double2 __tg_fabs(simd_double2 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffff); } +static inline SIMD_CFUNC simd_double3 __tg_fabs(simd_double3 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffff); } +static inline SIMD_CFUNC simd_double4 __tg_fabs(simd_double4 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffff); } +static inline SIMD_CFUNC simd_double8 __tg_fabs(simd_double8 x) { return simd_bitselect(0.0, x, 0x7fffffffffffffff); } + +#pragma mark - isfinite implementation +static inline SIMD_CFUNC simd_int2 __tg_isfinite(simd_float2 x) { return x == x && __tg_fabs(x) != (simd_float2)INFINITY; } +static inline SIMD_CFUNC simd_int3 __tg_isfinite(simd_float3 x) { return x == x && __tg_fabs(x) != (simd_float3)INFINITY; } +static inline SIMD_CFUNC simd_int4 __tg_isfinite(simd_float4 x) { return x == x && __tg_fabs(x) != (simd_float4)INFINITY; } +static inline SIMD_CFUNC simd_int8 __tg_isfinite(simd_float8 x) { return x == x && __tg_fabs(x) != (simd_float8)INFINITY; } +static inline SIMD_CFUNC simd_int16 __tg_isfinite(simd_float16 x) { return x == x && __tg_fabs(x) != (simd_float16)INFINITY; } +static inline SIMD_CFUNC simd_long2 __tg_isfinite(simd_double2 x) { return x == x && __tg_fabs(x) != (simd_double2)INFINITY; } +static inline SIMD_CFUNC simd_long3 __tg_isfinite(simd_double3 x) { return x == x && __tg_fabs(x) != (simd_double3)INFINITY; } +static inline SIMD_CFUNC simd_long4 __tg_isfinite(simd_double4 x) { return x == x && __tg_fabs(x) != (simd_double4)INFINITY; } +static inline SIMD_CFUNC simd_long8 __tg_isfinite(simd_double8 x) { return x == x && __tg_fabs(x) != (simd_double8)INFINITY; } + +#pragma mark - isinf implementation +static inline SIMD_CFUNC simd_int2 __tg_isinf(simd_float2 x) { return __tg_fabs(x) == (simd_float2)INFINITY; } +static inline SIMD_CFUNC simd_int3 __tg_isinf(simd_float3 x) { return __tg_fabs(x) == (simd_float3)INFINITY; } +static inline SIMD_CFUNC simd_int4 __tg_isinf(simd_float4 x) { return __tg_fabs(x) == (simd_float4)INFINITY; } +static inline SIMD_CFUNC simd_int8 __tg_isinf(simd_float8 x) { return __tg_fabs(x) == (simd_float8)INFINITY; } +static inline SIMD_CFUNC simd_int16 __tg_isinf(simd_float16 x) { return __tg_fabs(x) == (simd_float16)INFINITY; } +static inline SIMD_CFUNC simd_long2 __tg_isinf(simd_double2 x) { return __tg_fabs(x) == (simd_double2)INFINITY; } +static inline SIMD_CFUNC simd_long3 __tg_isinf(simd_double3 x) { return __tg_fabs(x) == (simd_double3)INFINITY; } +static inline SIMD_CFUNC simd_long4 __tg_isinf(simd_double4 x) { return __tg_fabs(x) == (simd_double4)INFINITY; } +static inline SIMD_CFUNC simd_long8 __tg_isinf(simd_double8 x) { return __tg_fabs(x) == (simd_double8)INFINITY; } + +#pragma mark - isnan implementation +static inline SIMD_CFUNC simd_int2 __tg_isnan(simd_float2 x) { return x != x; } +static inline SIMD_CFUNC simd_int3 __tg_isnan(simd_float3 x) { return x != x; } +static inline SIMD_CFUNC simd_int4 __tg_isnan(simd_float4 x) { return x != x; } +static inline SIMD_CFUNC simd_int8 __tg_isnan(simd_float8 x) { return x != x; } +static inline SIMD_CFUNC simd_int16 __tg_isnan(simd_float16 x) { return x != x; } +static inline SIMD_CFUNC simd_long2 __tg_isnan(simd_double2 x) { return x != x; } +static inline SIMD_CFUNC simd_long3 __tg_isnan(simd_double3 x) { return x != x; } +static inline SIMD_CFUNC simd_long4 __tg_isnan(simd_double4 x) { return x != x; } +static inline SIMD_CFUNC simd_long8 __tg_isnan(simd_double8 x) { return x != x; } + +#pragma mark - isnormal implementation +static inline SIMD_CFUNC simd_int2 __tg_isnormal(simd_float2 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_float2)__FLT_MIN__; } +static inline SIMD_CFUNC simd_int3 __tg_isnormal(simd_float3 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_float3)__FLT_MIN__; } +static inline SIMD_CFUNC simd_int4 __tg_isnormal(simd_float4 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_float4)__FLT_MIN__; } +static inline SIMD_CFUNC simd_int8 __tg_isnormal(simd_float8 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_float8)__FLT_MIN__; } +static inline SIMD_CFUNC simd_int16 __tg_isnormal(simd_float16 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_float16)__FLT_MIN__; } +static inline SIMD_CFUNC simd_long2 __tg_isnormal(simd_double2 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_double2)__DBL_MIN__; } +static inline SIMD_CFUNC simd_long3 __tg_isnormal(simd_double3 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_double3)__DBL_MIN__; } +static inline SIMD_CFUNC simd_long4 __tg_isnormal(simd_double4 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_double4)__DBL_MIN__; } +static inline SIMD_CFUNC simd_long8 __tg_isnormal(simd_double8 x) { return __tg_isfinite(x) && __tg_fabs(x) >= (simd_double8)__DBL_MIN__; } + +#pragma mark - fmin, fmax implementation +static SIMD_CFUNC simd_float2 __tg_fmin(simd_float2 x, simd_float2 y) { +#if defined __SSE2__ + return simd_make_float2(__tg_fmin(simd_make_float4_undef(x), simd_make_float4_undef(y))); +#elif defined __arm64__ + return vminnm_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vmin_f32(x, y); +#else + return simd_bitselect(y, x, (x <= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_fmin(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_fmin(simd_make_float4_undef(x), simd_make_float4_undef(y))); +} + +static SIMD_CFUNC simd_float4 __tg_fmin(simd_float4 x, simd_float4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm_range_ps(x, y, 4); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_min_ps(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_min_ps(x, y), x, y != y); +#elif defined __arm64__ + return vminnmq_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vminq_f32(x, y); +#else + return simd_bitselect(y, x, (x <= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_fmin(simd_float8 x, simd_float8 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm256_range_ps(x, y, 4); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_min_ps(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_min_ps(x, y), x, y != y); +#else + return simd_make_float8(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_fmin(simd_float16 x, simd_float16 y) { +#if defined __x86_64__ && defined __AVX512DQ__ && !__FINITE_MATH_ONLY__ + return _mm512_range_ps(x, y, 4); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_min_ps(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_min_ps(x, y), x, y != y); +#else + return simd_make_float16(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double2 __tg_fmin(simd_double2 x, simd_double2 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm_range_pd(x, y, 4); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_min_pd(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_min_pd(x, y), x, y != y); +#elif defined __arm64__ + return vminnmq_f64(x, y); +#else + return simd_bitselect(y, x, (x <= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_fmin(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_fmin(simd_make_double4_undef(x), simd_make_double4_undef(y))); +} + +static SIMD_CFUNC simd_double4 __tg_fmin(simd_double4 x, simd_double4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm256_range_pd(x, y, 4); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_min_pd(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_min_pd(x, y), x, y != y); +#else + return simd_make_double4(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_fmin(simd_double8 x, simd_double8 y) { +#if defined __x86_64__ && defined __AVX512DQ__ + return _mm512_range_pd(x, y, 4); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_min_pd(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_min_pd(x, y), x, y != y); +#else + return simd_make_double8(__tg_fmin(x.lo, y.lo), __tg_fmin(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_fmax(simd_float2 x, simd_float2 y) { +#if defined __SSE2__ + return simd_make_float2(__tg_fmax(simd_make_float4_undef(x), simd_make_float4_undef(y))); +#elif defined __arm64__ + return vmaxnm_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vmax_f32(x, y); +#else + return simd_bitselect(y, x, (x >= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_fmax(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_fmax(simd_make_float4_undef(x), simd_make_float4_undef(y))); +} + +static SIMD_CFUNC simd_float4 __tg_fmax(simd_float4 x, simd_float4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm_range_ps(x, y, 5); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_max_ps(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_max_ps(x, y), x, y != y); +#elif defined __arm64__ + return vmaxnmq_f32(x, y); +#elif defined __arm__ && __FINITE_MATH_ONLY__ + return vmaxq_f32(x, y); +#else + return simd_bitselect(y, x, (x >= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_fmax(simd_float8 x, simd_float8 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ && !__FINITE_MATH_ONLY__ + return _mm256_range_ps(x, y, 5); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_max_ps(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_max_ps(x, y), x, y != y); +#else + return simd_make_float8(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_fmax(simd_float16 x, simd_float16 y) { +#if defined __x86_64__ && defined __AVX512DQ__ && !__FINITE_MATH_ONLY__ + return _mm512_range_ps(x, y, 5); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_max_ps(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_max_ps(x, y), x, y != y); +#else + return simd_make_float16(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double2 __tg_fmax(simd_double2 x, simd_double2 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm_range_pd(x, y, 5); +#elif defined __SSE2__ && __FINITE_MATH_ONLY__ + return _mm_max_pd(x, y); +#elif defined __SSE2__ + return simd_bitselect(_mm_max_pd(x, y), x, y != y); +#elif defined __arm64__ + return vmaxnmq_f64(x, y); +#else + return simd_bitselect(y, x, (x >= y) | (y != y)); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_fmax(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_fmax(simd_make_double4_undef(x), simd_make_double4_undef(y))); +} + +static SIMD_CFUNC simd_double4 __tg_fmax(simd_double4 x, simd_double4 y) { +#if defined __AVX512DQ__ && defined __AVX512VL__ + return _mm256_range_pd(x, y, 5); +#elif defined __AVX__ && __FINITE_MATH_ONLY__ + return _mm256_max_pd(x, y); +#elif defined __AVX__ + return simd_bitselect(_mm256_max_pd(x, y), x, y != y); +#else + return simd_make_double4(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_fmax(simd_double8 x, simd_double8 y) { +#if defined __x86_64__ && defined __AVX512DQ__ + return _mm512_range_pd(x, y, 5); +#elif defined __x86_64__ && defined __AVX512F__ && __FINITE_MATH_ONLY__ + return _mm512_max_pd(x, y); +#elif defined __x86_64__ && defined __AVX512F__ + return simd_bitselect(_mm512_max_pd(x, y), x, y != y); +#else + return simd_make_double8(__tg_fmax(x.lo, y.lo), __tg_fmax(x.hi, y.hi)); +#endif +} + +#pragma mark - copysign implementation +static inline SIMD_CFUNC simd_float2 __tg_copysign(simd_float2 x, simd_float2 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float3 __tg_copysign(simd_float3 x, simd_float3 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float4 __tg_copysign(simd_float4 x, simd_float4 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float8 __tg_copysign(simd_float8 x, simd_float8 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_float16 __tg_copysign(simd_float16 x, simd_float16 y) { return simd_bitselect(y, x, 0x7fffffff); } +static inline SIMD_CFUNC simd_double2 __tg_copysign(simd_double2 x, simd_double2 y) { return simd_bitselect(y, x, 0x7fffffffffffffff); } +static inline SIMD_CFUNC simd_double3 __tg_copysign(simd_double3 x, simd_double3 y) { return simd_bitselect(y, x, 0x7fffffffffffffff); } +static inline SIMD_CFUNC simd_double4 __tg_copysign(simd_double4 x, simd_double4 y) { return simd_bitselect(y, x, 0x7fffffffffffffff); } +static inline SIMD_CFUNC simd_double8 __tg_copysign(simd_double8 x, simd_double8 y) { return simd_bitselect(y, x, 0x7fffffffffffffff); } + +#pragma mark - sqrt implementation +static SIMD_CFUNC simd_float2 __tg_sqrt(simd_float2 x) { +#if defined __SSE2__ + return simd_make_float2(__tg_sqrt(simd_make_float4_undef(x))); +#elif defined __arm64__ + return vsqrt_f32(x); +#else + return simd_make_float2(sqrt(x.x), sqrt(x.y)); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_sqrt(simd_float3 x) { + return simd_make_float3(__tg_sqrt(simd_make_float4_undef(x))); +} + +static SIMD_CFUNC simd_float4 __tg_sqrt(simd_float4 x) { +#if defined __SSE2__ + return _mm_sqrt_ps(x); +#elif defined __arm64__ + return vsqrtq_f32(x); +#else + return simd_make_float4(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_sqrt(simd_float8 x) { +#if defined __AVX__ + return _mm256_sqrt_ps(x); +#else + return simd_make_float8(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_sqrt(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_sqrt_ps(x); +#else + return simd_make_float16(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double2 __tg_sqrt(simd_double2 x) { +#if defined __SSE2__ + return _mm_sqrt_pd(x); +#elif defined __arm64__ + return vsqrtq_f64(x); +#else + return simd_make_double2(sqrt(x.x), sqrt(x.y)); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_sqrt(simd_double3 x) { + return simd_make_double3(__tg_sqrt(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_sqrt(simd_double4 x) { +#if defined __AVX__ + return _mm256_sqrt_pd(x); +#else + return simd_make_double4(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_sqrt(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_sqrt_pd(x); +#else + return simd_make_double8(__tg_sqrt(x.lo), __tg_sqrt(x.hi)); +#endif +} + +#pragma mark - ceil, floor, rint, trunc implementation +static SIMD_CFUNC simd_float2 __tg_ceil(simd_float2 x) { +#if defined __arm64__ + return vrndp_f32(x); +#else + return simd_make_float2(__tg_ceil(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_ceil(simd_float3 x) { + return simd_make_float3(__tg_ceil(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_ceil_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_ceil(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndpq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_ceil_f4(x); +#else + simd_float4 truncated = __tg_trunc(x); + simd_float4 adjust = simd_bitselect((simd_float4)0, 1, truncated < x); + return __tg_copysign(truncated + adjust, x); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_ceil(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float8(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_ceil(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float16(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_ceil_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_ceil(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndpq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_ceil_d2(x); +#else + simd_double2 truncated = __tg_trunc(x); + simd_double2 adjust = simd_bitselect((simd_double2)0, 1, truncated < x); + return __tg_copysign(truncated + adjust, x); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_ceil(simd_double3 x) { + return simd_make_double3(__tg_ceil(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_ceil(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double4(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_ceil(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double8(__tg_ceil(x.lo), __tg_ceil(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_floor(simd_float2 x) { +#if defined __arm64__ + return vrndm_f32(x); +#else + return simd_make_float2(__tg_floor(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_floor(simd_float3 x) { + return simd_make_float3(__tg_floor(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_floor_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_floor(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndmq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_floor_f4(x); +#else + simd_float4 truncated = __tg_trunc(x); + simd_float4 adjust = simd_bitselect((simd_float4)0, 1, truncated > x); + return truncated - adjust; +#endif +} + +static SIMD_CFUNC simd_float8 __tg_floor(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float8(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_floor(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_float16(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_floor_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_floor(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndmq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_floor_d2(x); +#else + simd_double2 truncated = __tg_trunc(x); + simd_double2 adjust = simd_bitselect((simd_double2)0, 1, truncated > x); + return truncated - adjust; +#endif +} + +static SIMD_CFUNC simd_double3 __tg_floor(simd_double3 x) { + return simd_make_double3(__tg_floor(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_floor(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double4(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_floor(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC); +#else + return simd_make_double8(__tg_floor(x.lo), __tg_floor(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_rint(simd_float2 x) { +#if defined __arm64__ + return vrndx_f32(x); +#else + return simd_make_float2(__tg_rint(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_rint(simd_float3 x) { + return simd_make_float3(__tg_rint(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_rint_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_rint(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_RINT); +#elif defined __arm64__ + return vrndxq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_rint_f4(x); +#else + simd_float4 magic = __tg_copysign(0x1.0p23, x); + simd_int4 x_is_small = __tg_fabs(x) < 0x1.0p23; + return simd_bitselect(x, (x + magic) - magic, x_is_small & 0x7fffffff); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_rint(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_RINT); +#else + return simd_make_float8(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_rint(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_RINT); +#else + return simd_make_float16(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_rint_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_rint(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_RINT); +#elif defined __arm64__ + return vrndxq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_rint_d2(x); +#else + simd_double2 magic = __tg_copysign(0x1.0p52, x); + simd_long2 x_is_small = __tg_fabs(x) < 0x1.0p52; + return simd_bitselect(x, (x + magic) - magic, x_is_small & 0x7fffffffffffffff); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_rint(simd_double3 x) { + return simd_make_double3(__tg_rint(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_rint(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_RINT); +#else + return simd_make_double4(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_rint(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_RINT); +#else + return simd_make_double8(__tg_rint(x.lo), __tg_rint(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float2 __tg_trunc(simd_float2 x) { +#if defined __arm64__ + return vrnd_f32(x); +#else + return simd_make_float2(__tg_trunc(simd_make_float4_undef(x))); +#endif +} + +static SIMD_CFUNC simd_float3 __tg_trunc(simd_float3 x) { + return simd_make_float3(__tg_trunc(simd_make_float4_undef(x))); +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_trunc_f4(simd_float4 x); +#endif + +static SIMD_CFUNC simd_float4 __tg_trunc(simd_float4 x) { +#if defined __SSE4_1__ + return _mm_round_ps(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndq_f32(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_trunc_f4(x); +#else + simd_float4 binade = simd_bitselect(0, x, 0x7f800000); + simd_int4 mask = (simd_int4)__tg_fmin(-2*binade + 1, -0); + simd_float4 result = simd_bitselect(0, x, mask); + return simd_bitselect(x, result, binade < 0x1.0p23); +#endif +} + +static SIMD_CFUNC simd_float8 __tg_trunc(simd_float8 x) { +#if defined __AVX__ + return _mm256_round_ps(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_float8(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +static SIMD_CFUNC simd_float16 __tg_trunc(simd_float16 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_ps(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_float16(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +#if defined __arm__ && SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_trunc_d2(simd_double2 x); +#endif + +static SIMD_CFUNC simd_double2 __tg_trunc(simd_double2 x) { +#if defined __SSE4_1__ + return _mm_round_pd(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#elif defined __arm64__ + return vrndq_f64(x); +#elif defined __arm__ && SIMD_LIBRARY_VERSION >= 3 + return _simd_trunc_d2(x); +#else + simd_double2 binade = simd_bitselect(0, x, 0x7ff0000000000000); + simd_long2 mask = (simd_long2)__tg_fmin(-2*binade + 1, -0); + simd_double2 result = simd_bitselect(0, x, mask); + return simd_bitselect(x, result, binade < 0x1.0p52); +#endif +} + +static SIMD_CFUNC simd_double3 __tg_trunc(simd_double3 x) { + return simd_make_double3(__tg_trunc(simd_make_double4_undef(x))); +} + +static SIMD_CFUNC simd_double4 __tg_trunc(simd_double4 x) { +#if defined __AVX__ + return _mm256_round_pd(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_double4(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +static SIMD_CFUNC simd_double8 __tg_trunc(simd_double8 x) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_roundscale_pd(x, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC); +#else + return simd_make_double8(__tg_trunc(x.lo), __tg_trunc(x.hi)); +#endif +} + +#pragma mark - sine, cosine implementation +static inline SIMD_CFUNC simd_float2 __tg_sin(simd_float2 x) { + return simd_make_float2(__tg_sin(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_sin(simd_float3 x) { + return simd_make_float3(__tg_sin(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_sin_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x) { + return _simd_sin_f4(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_float4 __sin_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x) { + return __sin_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_sin(simd_float4 x) { + return simd_make_float4(sin(x.x), sin(x.y), sin(x.z), sin(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_sin_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_sin(simd_float8 x) { + return _simd_sin_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_sin(simd_float8 x) { + return simd_make_float8(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_sin_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_sin(simd_float16 x) { + return _simd_sin_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_sin(simd_float16 x) { + return simd_make_float16(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_sin_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x) { + return _simd_sin_d2(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_double2 __sin_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x) { + return __sin_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_sin(simd_double2 x) { + return simd_make_double2(sin(x.x), sin(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_sin(simd_double3 x) { + return simd_make_double3(__tg_sin(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_sin_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_sin(simd_double4 x) { + return _simd_sin_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_sin(simd_double4 x) { + return simd_make_double4(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_sin_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_sin(simd_double8 x) { + return _simd_sin_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_sin(simd_double8 x) { + return simd_make_double8(__tg_sin(x.lo), __tg_sin(x.hi)); +} +#endif + +static inline SIMD_CFUNC simd_float2 __tg_cos(simd_float2 x) { + return simd_make_float2(__tg_cos(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cos(simd_float3 x) { + return simd_make_float3(__tg_cos(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cos_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x) { + return _simd_cos_f4(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_float4 __cos_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x) { + return __cos_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cos(simd_float4 x) { + return simd_make_float4(cos(x.x), cos(x.y), cos(x.z), cos(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cos_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cos(simd_float8 x) { + return _simd_cos_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cos(simd_float8 x) { + return simd_make_float8(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cos_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cos(simd_float16 x) { + return _simd_cos_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cos(simd_float16 x) { + return simd_make_float16(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cos_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x) { + return _simd_cos_d2(x); +} +#elif SIMD_LIBRARY_VERSION == 1 +extern simd_double2 __cos_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x) { + return __cos_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cos(simd_double2 x) { + return simd_make_double2(cos(x.x), cos(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cos(simd_double3 x) { + return simd_make_double3(__tg_cos(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cos_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cos(simd_double4 x) { + return _simd_cos_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cos(simd_double4 x) { + return simd_make_double4(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cos_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cos(simd_double8 x) { + return _simd_cos_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cos(simd_double8 x) { + return simd_make_double8(__tg_cos(x.lo), __tg_cos(x.hi)); +} +#endif + + +#pragma mark - acos implementation +static inline SIMD_CFUNC simd_float2 __tg_acos(simd_float2 x) { + return simd_make_float2(__tg_acos(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_acos(simd_float3 x) { + return simd_make_float3(__tg_acos(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_acos_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_acos(simd_float4 x) { + return _simd_acos_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_acos(simd_float4 x) { + return simd_make_float4(acos(x.x), acos(x.y), acos(x.z), acos(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_acos_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_acos(simd_float8 x) { + return _simd_acos_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_acos(simd_float8 x) { + return simd_make_float8(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_acos_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_acos(simd_float16 x) { + return _simd_acos_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_acos(simd_float16 x) { + return simd_make_float16(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_acos_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_acos(simd_double2 x) { + return _simd_acos_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_acos(simd_double2 x) { + return simd_make_double2(acos(x.x), acos(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_acos(simd_double3 x) { + return simd_make_double3(__tg_acos(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_acos_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_acos(simd_double4 x) { + return _simd_acos_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_acos(simd_double4 x) { + return simd_make_double4(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_acos_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_acos(simd_double8 x) { + return _simd_acos_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_acos(simd_double8 x) { + return simd_make_double8(__tg_acos(x.lo), __tg_acos(x.hi)); +} +#endif + +#pragma mark - asin implementation +static inline SIMD_CFUNC simd_float2 __tg_asin(simd_float2 x) { + return simd_make_float2(__tg_asin(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_asin(simd_float3 x) { + return simd_make_float3(__tg_asin(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_asin_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_asin(simd_float4 x) { + return _simd_asin_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_asin(simd_float4 x) { + return simd_make_float4(asin(x.x), asin(x.y), asin(x.z), asin(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_asin_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_asin(simd_float8 x) { + return _simd_asin_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_asin(simd_float8 x) { + return simd_make_float8(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_asin_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_asin(simd_float16 x) { + return _simd_asin_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_asin(simd_float16 x) { + return simd_make_float16(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_asin_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_asin(simd_double2 x) { + return _simd_asin_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_asin(simd_double2 x) { + return simd_make_double2(asin(x.x), asin(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_asin(simd_double3 x) { + return simd_make_double3(__tg_asin(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_asin_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_asin(simd_double4 x) { + return _simd_asin_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_asin(simd_double4 x) { + return simd_make_double4(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_asin_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_asin(simd_double8 x) { + return _simd_asin_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_asin(simd_double8 x) { + return simd_make_double8(__tg_asin(x.lo), __tg_asin(x.hi)); +} +#endif + +#pragma mark - atan implementation +static inline SIMD_CFUNC simd_float2 __tg_atan(simd_float2 x) { + return simd_make_float2(__tg_atan(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_atan(simd_float3 x) { + return simd_make_float3(__tg_atan(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_atan_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_atan(simd_float4 x) { + return _simd_atan_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_atan(simd_float4 x) { + return simd_make_float4(atan(x.x), atan(x.y), atan(x.z), atan(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_atan_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_atan(simd_float8 x) { + return _simd_atan_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_atan(simd_float8 x) { + return simd_make_float8(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_atan_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_atan(simd_float16 x) { + return _simd_atan_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_atan(simd_float16 x) { + return simd_make_float16(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_atan_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_atan(simd_double2 x) { + return _simd_atan_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_atan(simd_double2 x) { + return simd_make_double2(atan(x.x), atan(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_atan(simd_double3 x) { + return simd_make_double3(__tg_atan(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_atan_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_atan(simd_double4 x) { + return _simd_atan_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_atan(simd_double4 x) { + return simd_make_double4(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_atan_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_atan(simd_double8 x) { + return _simd_atan_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_atan(simd_double8 x) { + return simd_make_double8(__tg_atan(x.lo), __tg_atan(x.hi)); +} +#endif + +#pragma mark - tan implementation +static inline SIMD_CFUNC simd_float2 __tg_tan(simd_float2 x) { + return simd_make_float2(__tg_tan(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tan(simd_float3 x) { + return simd_make_float3(__tg_tan(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tan_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tan(simd_float4 x) { + return _simd_tan_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tan(simd_float4 x) { + return simd_make_float4(tan(x.x), tan(x.y), tan(x.z), tan(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tan_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tan(simd_float8 x) { + return _simd_tan_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tan(simd_float8 x) { + return simd_make_float8(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tan_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tan(simd_float16 x) { + return _simd_tan_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tan(simd_float16 x) { + return simd_make_float16(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tan_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tan(simd_double2 x) { + return _simd_tan_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tan(simd_double2 x) { + return simd_make_double2(tan(x.x), tan(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tan(simd_double3 x) { + return simd_make_double3(__tg_tan(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tan_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tan(simd_double4 x) { + return _simd_tan_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tan(simd_double4 x) { + return simd_make_double4(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tan_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tan(simd_double8 x) { + return _simd_tan_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tan(simd_double8 x) { + return simd_make_double8(__tg_tan(x.lo), __tg_tan(x.hi)); +} +#endif + +#pragma mark - cospi implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_cospi(simd_float2 x) { + return simd_make_float2(__tg_cospi(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cospi(simd_float3 x) { + return simd_make_float3(__tg_cospi(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cospi_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cospi(simd_float4 x) { + return _simd_cospi_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cospi(simd_float4 x) { + return simd_make_float4(__cospi(x.x), __cospi(x.y), __cospi(x.z), __cospi(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cospi_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cospi(simd_float8 x) { + return _simd_cospi_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cospi(simd_float8 x) { + return simd_make_float8(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cospi_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cospi(simd_float16 x) { + return _simd_cospi_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cospi(simd_float16 x) { + return simd_make_float16(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cospi_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cospi(simd_double2 x) { + return _simd_cospi_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cospi(simd_double2 x) { + return simd_make_double2(__cospi(x.x), __cospi(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cospi(simd_double3 x) { + return simd_make_double3(__tg_cospi(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cospi_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cospi(simd_double4 x) { + return _simd_cospi_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cospi(simd_double4 x) { + return simd_make_double4(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cospi_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cospi(simd_double8 x) { + return _simd_cospi_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cospi(simd_double8 x) { + return simd_make_double8(__tg_cospi(x.lo), __tg_cospi(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - sinpi implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_sinpi(simd_float2 x) { + return simd_make_float2(__tg_sinpi(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_sinpi(simd_float3 x) { + return simd_make_float3(__tg_sinpi(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_sinpi_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sinpi(simd_float4 x) { + return _simd_sinpi_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_sinpi(simd_float4 x) { + return simd_make_float4(__sinpi(x.x), __sinpi(x.y), __sinpi(x.z), __sinpi(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_sinpi_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_sinpi(simd_float8 x) { + return _simd_sinpi_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_sinpi(simd_float8 x) { + return simd_make_float8(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_sinpi_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_sinpi(simd_float16 x) { + return _simd_sinpi_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_sinpi(simd_float16 x) { + return simd_make_float16(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_sinpi_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sinpi(simd_double2 x) { + return _simd_sinpi_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_sinpi(simd_double2 x) { + return simd_make_double2(__sinpi(x.x), __sinpi(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_sinpi(simd_double3 x) { + return simd_make_double3(__tg_sinpi(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_sinpi_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_sinpi(simd_double4 x) { + return _simd_sinpi_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_sinpi(simd_double4 x) { + return simd_make_double4(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_sinpi_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_sinpi(simd_double8 x) { + return _simd_sinpi_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_sinpi(simd_double8 x) { + return simd_make_double8(__tg_sinpi(x.lo), __tg_sinpi(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - tanpi implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_tanpi(simd_float2 x) { + return simd_make_float2(__tg_tanpi(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tanpi(simd_float3 x) { + return simd_make_float3(__tg_tanpi(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tanpi_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tanpi(simd_float4 x) { + return _simd_tanpi_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tanpi(simd_float4 x) { + return simd_make_float4(__tanpi(x.x), __tanpi(x.y), __tanpi(x.z), __tanpi(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tanpi_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tanpi(simd_float8 x) { + return _simd_tanpi_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tanpi(simd_float8 x) { + return simd_make_float8(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tanpi_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tanpi(simd_float16 x) { + return _simd_tanpi_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tanpi(simd_float16 x) { + return simd_make_float16(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tanpi_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tanpi(simd_double2 x) { + return _simd_tanpi_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tanpi(simd_double2 x) { + return simd_make_double2(__tanpi(x.x), __tanpi(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tanpi(simd_double3 x) { + return simd_make_double3(__tg_tanpi(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tanpi_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tanpi(simd_double4 x) { + return _simd_tanpi_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tanpi(simd_double4 x) { + return simd_make_double4(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tanpi_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tanpi(simd_double8 x) { + return _simd_tanpi_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tanpi(simd_double8 x) { + return simd_make_double8(__tg_tanpi(x.lo), __tg_tanpi(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - acosh implementation +static inline SIMD_CFUNC simd_float2 __tg_acosh(simd_float2 x) { + return simd_make_float2(__tg_acosh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_acosh(simd_float3 x) { + return simd_make_float3(__tg_acosh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_acosh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_acosh(simd_float4 x) { + return _simd_acosh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_acosh(simd_float4 x) { + return simd_make_float4(acosh(x.x), acosh(x.y), acosh(x.z), acosh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_acosh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_acosh(simd_float8 x) { + return _simd_acosh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_acosh(simd_float8 x) { + return simd_make_float8(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_acosh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_acosh(simd_float16 x) { + return _simd_acosh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_acosh(simd_float16 x) { + return simd_make_float16(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_acosh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_acosh(simd_double2 x) { + return _simd_acosh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_acosh(simd_double2 x) { + return simd_make_double2(acosh(x.x), acosh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_acosh(simd_double3 x) { + return simd_make_double3(__tg_acosh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_acosh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_acosh(simd_double4 x) { + return _simd_acosh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_acosh(simd_double4 x) { + return simd_make_double4(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_acosh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_acosh(simd_double8 x) { + return _simd_acosh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_acosh(simd_double8 x) { + return simd_make_double8(__tg_acosh(x.lo), __tg_acosh(x.hi)); +} +#endif + +#pragma mark - asinh implementation +static inline SIMD_CFUNC simd_float2 __tg_asinh(simd_float2 x) { + return simd_make_float2(__tg_asinh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_asinh(simd_float3 x) { + return simd_make_float3(__tg_asinh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_asinh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_asinh(simd_float4 x) { + return _simd_asinh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_asinh(simd_float4 x) { + return simd_make_float4(asinh(x.x), asinh(x.y), asinh(x.z), asinh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_asinh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_asinh(simd_float8 x) { + return _simd_asinh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_asinh(simd_float8 x) { + return simd_make_float8(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_asinh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_asinh(simd_float16 x) { + return _simd_asinh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_asinh(simd_float16 x) { + return simd_make_float16(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_asinh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_asinh(simd_double2 x) { + return _simd_asinh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_asinh(simd_double2 x) { + return simd_make_double2(asinh(x.x), asinh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_asinh(simd_double3 x) { + return simd_make_double3(__tg_asinh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_asinh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_asinh(simd_double4 x) { + return _simd_asinh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_asinh(simd_double4 x) { + return simd_make_double4(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_asinh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_asinh(simd_double8 x) { + return _simd_asinh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_asinh(simd_double8 x) { + return simd_make_double8(__tg_asinh(x.lo), __tg_asinh(x.hi)); +} +#endif + +#pragma mark - atanh implementation +static inline SIMD_CFUNC simd_float2 __tg_atanh(simd_float2 x) { + return simd_make_float2(__tg_atanh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_atanh(simd_float3 x) { + return simd_make_float3(__tg_atanh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_atanh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_atanh(simd_float4 x) { + return _simd_atanh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_atanh(simd_float4 x) { + return simd_make_float4(atanh(x.x), atanh(x.y), atanh(x.z), atanh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_atanh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_atanh(simd_float8 x) { + return _simd_atanh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_atanh(simd_float8 x) { + return simd_make_float8(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_atanh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_atanh(simd_float16 x) { + return _simd_atanh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_atanh(simd_float16 x) { + return simd_make_float16(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_atanh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_atanh(simd_double2 x) { + return _simd_atanh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_atanh(simd_double2 x) { + return simd_make_double2(atanh(x.x), atanh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_atanh(simd_double3 x) { + return simd_make_double3(__tg_atanh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_atanh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_atanh(simd_double4 x) { + return _simd_atanh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_atanh(simd_double4 x) { + return simd_make_double4(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_atanh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_atanh(simd_double8 x) { + return _simd_atanh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_atanh(simd_double8 x) { + return simd_make_double8(__tg_atanh(x.lo), __tg_atanh(x.hi)); +} +#endif + +#pragma mark - cosh implementation +static inline SIMD_CFUNC simd_float2 __tg_cosh(simd_float2 x) { + return simd_make_float2(__tg_cosh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cosh(simd_float3 x) { + return simd_make_float3(__tg_cosh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cosh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cosh(simd_float4 x) { + return _simd_cosh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cosh(simd_float4 x) { + return simd_make_float4(cosh(x.x), cosh(x.y), cosh(x.z), cosh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cosh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cosh(simd_float8 x) { + return _simd_cosh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cosh(simd_float8 x) { + return simd_make_float8(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cosh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cosh(simd_float16 x) { + return _simd_cosh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cosh(simd_float16 x) { + return simd_make_float16(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cosh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cosh(simd_double2 x) { + return _simd_cosh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cosh(simd_double2 x) { + return simd_make_double2(cosh(x.x), cosh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cosh(simd_double3 x) { + return simd_make_double3(__tg_cosh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cosh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cosh(simd_double4 x) { + return _simd_cosh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cosh(simd_double4 x) { + return simd_make_double4(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cosh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cosh(simd_double8 x) { + return _simd_cosh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cosh(simd_double8 x) { + return simd_make_double8(__tg_cosh(x.lo), __tg_cosh(x.hi)); +} +#endif + +#pragma mark - sinh implementation +static inline SIMD_CFUNC simd_float2 __tg_sinh(simd_float2 x) { + return simd_make_float2(__tg_sinh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_sinh(simd_float3 x) { + return simd_make_float3(__tg_sinh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_sinh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_sinh(simd_float4 x) { + return _simd_sinh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_sinh(simd_float4 x) { + return simd_make_float4(sinh(x.x), sinh(x.y), sinh(x.z), sinh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_sinh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_sinh(simd_float8 x) { + return _simd_sinh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_sinh(simd_float8 x) { + return simd_make_float8(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_sinh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_sinh(simd_float16 x) { + return _simd_sinh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_sinh(simd_float16 x) { + return simd_make_float16(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_sinh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_sinh(simd_double2 x) { + return _simd_sinh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_sinh(simd_double2 x) { + return simd_make_double2(sinh(x.x), sinh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_sinh(simd_double3 x) { + return simd_make_double3(__tg_sinh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_sinh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_sinh(simd_double4 x) { + return _simd_sinh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_sinh(simd_double4 x) { + return simd_make_double4(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_sinh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_sinh(simd_double8 x) { + return _simd_sinh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_sinh(simd_double8 x) { + return simd_make_double8(__tg_sinh(x.lo), __tg_sinh(x.hi)); +} +#endif + +#pragma mark - tanh implementation +static inline SIMD_CFUNC simd_float2 __tg_tanh(simd_float2 x) { + return simd_make_float2(__tg_tanh(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tanh(simd_float3 x) { + return simd_make_float3(__tg_tanh(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tanh_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tanh(simd_float4 x) { + return _simd_tanh_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tanh(simd_float4 x) { + return simd_make_float4(tanh(x.x), tanh(x.y), tanh(x.z), tanh(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tanh_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tanh(simd_float8 x) { + return _simd_tanh_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tanh(simd_float8 x) { + return simd_make_float8(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tanh_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tanh(simd_float16 x) { + return _simd_tanh_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tanh(simd_float16 x) { + return simd_make_float16(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tanh_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tanh(simd_double2 x) { + return _simd_tanh_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tanh(simd_double2 x) { + return simd_make_double2(tanh(x.x), tanh(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tanh(simd_double3 x) { + return simd_make_double3(__tg_tanh(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tanh_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tanh(simd_double4 x) { + return _simd_tanh_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tanh(simd_double4 x) { + return simd_make_double4(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tanh_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tanh(simd_double8 x) { + return _simd_tanh_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tanh(simd_double8 x) { + return simd_make_double8(__tg_tanh(x.lo), __tg_tanh(x.hi)); +} +#endif + +#pragma mark - exp implementation +static inline SIMD_CFUNC simd_float2 __tg_exp(simd_float2 x) { + return simd_make_float2(__tg_exp(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_exp(simd_float3 x) { + return simd_make_float3(__tg_exp(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_exp_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_exp(simd_float4 x) { + return _simd_exp_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_exp(simd_float4 x) { + return simd_make_float4(exp(x.x), exp(x.y), exp(x.z), exp(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_exp_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_exp(simd_float8 x) { + return _simd_exp_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_exp(simd_float8 x) { + return simd_make_float8(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_exp_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_exp(simd_float16 x) { + return _simd_exp_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_exp(simd_float16 x) { + return simd_make_float16(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_exp_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_exp(simd_double2 x) { + return _simd_exp_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_exp(simd_double2 x) { + return simd_make_double2(exp(x.x), exp(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_exp(simd_double3 x) { + return simd_make_double3(__tg_exp(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_exp_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_exp(simd_double4 x) { + return _simd_exp_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_exp(simd_double4 x) { + return simd_make_double4(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_exp_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_exp(simd_double8 x) { + return _simd_exp_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_exp(simd_double8 x) { + return simd_make_double8(__tg_exp(x.lo), __tg_exp(x.hi)); +} +#endif + +#pragma mark - exp2 implementation +static inline SIMD_CFUNC simd_float2 __tg_exp2(simd_float2 x) { + return simd_make_float2(__tg_exp2(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_exp2(simd_float3 x) { + return simd_make_float3(__tg_exp2(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_exp2_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_exp2(simd_float4 x) { + return _simd_exp2_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_exp2(simd_float4 x) { + return simd_make_float4(exp2(x.x), exp2(x.y), exp2(x.z), exp2(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_exp2_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_exp2(simd_float8 x) { + return _simd_exp2_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_exp2(simd_float8 x) { + return simd_make_float8(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_exp2_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_exp2(simd_float16 x) { + return _simd_exp2_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_exp2(simd_float16 x) { + return simd_make_float16(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_exp2_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_exp2(simd_double2 x) { + return _simd_exp2_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_exp2(simd_double2 x) { + return simd_make_double2(exp2(x.x), exp2(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_exp2(simd_double3 x) { + return simd_make_double3(__tg_exp2(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_exp2_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_exp2(simd_double4 x) { + return _simd_exp2_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_exp2(simd_double4 x) { + return simd_make_double4(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_exp2_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_exp2(simd_double8 x) { + return _simd_exp2_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_exp2(simd_double8 x) { + return simd_make_double8(__tg_exp2(x.lo), __tg_exp2(x.hi)); +} +#endif + +#pragma mark - exp10 implementation +#if SIMD_LIBRARY_VERSION >= 1 +static inline SIMD_CFUNC simd_float2 __tg_exp10(simd_float2 x) { + return simd_make_float2(__tg_exp10(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_exp10(simd_float3 x) { + return simd_make_float3(__tg_exp10(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_exp10_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_exp10(simd_float4 x) { + return _simd_exp10_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_exp10(simd_float4 x) { + return simd_make_float4(__exp10(x.x), __exp10(x.y), __exp10(x.z), __exp10(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_exp10_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_exp10(simd_float8 x) { + return _simd_exp10_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_exp10(simd_float8 x) { + return simd_make_float8(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_exp10_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_exp10(simd_float16 x) { + return _simd_exp10_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_exp10(simd_float16 x) { + return simd_make_float16(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_exp10_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_exp10(simd_double2 x) { + return _simd_exp10_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_exp10(simd_double2 x) { + return simd_make_double2(__exp10(x.x), __exp10(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_exp10(simd_double3 x) { + return simd_make_double3(__tg_exp10(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_exp10_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_exp10(simd_double4 x) { + return _simd_exp10_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_exp10(simd_double4 x) { + return simd_make_double4(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_exp10_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_exp10(simd_double8 x) { + return _simd_exp10_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_exp10(simd_double8 x) { + return simd_make_double8(__tg_exp10(x.lo), __tg_exp10(x.hi)); +} +#endif + +#endif /* SIMD_LIBRARY_VERSION */ +#pragma mark - expm1 implementation +static inline SIMD_CFUNC simd_float2 __tg_expm1(simd_float2 x) { + return simd_make_float2(__tg_expm1(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_expm1(simd_float3 x) { + return simd_make_float3(__tg_expm1(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_expm1_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_expm1(simd_float4 x) { + return _simd_expm1_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_expm1(simd_float4 x) { + return simd_make_float4(expm1(x.x), expm1(x.y), expm1(x.z), expm1(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_expm1_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_expm1(simd_float8 x) { + return _simd_expm1_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_expm1(simd_float8 x) { + return simd_make_float8(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_expm1_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_expm1(simd_float16 x) { + return _simd_expm1_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_expm1(simd_float16 x) { + return simd_make_float16(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_expm1_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_expm1(simd_double2 x) { + return _simd_expm1_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_expm1(simd_double2 x) { + return simd_make_double2(expm1(x.x), expm1(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_expm1(simd_double3 x) { + return simd_make_double3(__tg_expm1(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_expm1_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_expm1(simd_double4 x) { + return _simd_expm1_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_expm1(simd_double4 x) { + return simd_make_double4(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_expm1_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_expm1(simd_double8 x) { + return _simd_expm1_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_expm1(simd_double8 x) { + return simd_make_double8(__tg_expm1(x.lo), __tg_expm1(x.hi)); +} +#endif + +#pragma mark - log implementation +static inline SIMD_CFUNC simd_float2 __tg_log(simd_float2 x) { + return simd_make_float2(__tg_log(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log(simd_float3 x) { + return simd_make_float3(__tg_log(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log(simd_float4 x) { + return _simd_log_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log(simd_float4 x) { + return simd_make_float4(log(x.x), log(x.y), log(x.z), log(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log(simd_float8 x) { + return _simd_log_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log(simd_float8 x) { + return simd_make_float8(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log(simd_float16 x) { + return _simd_log_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log(simd_float16 x) { + return simd_make_float16(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log(simd_double2 x) { + return _simd_log_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log(simd_double2 x) { + return simd_make_double2(log(x.x), log(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log(simd_double3 x) { + return simd_make_double3(__tg_log(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log(simd_double4 x) { + return _simd_log_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log(simd_double4 x) { + return simd_make_double4(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log(simd_double8 x) { + return _simd_log_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log(simd_double8 x) { + return simd_make_double8(__tg_log(x.lo), __tg_log(x.hi)); +} +#endif + +#pragma mark - log2 implementation +static inline SIMD_CFUNC simd_float2 __tg_log2(simd_float2 x) { + return simd_make_float2(__tg_log2(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log2(simd_float3 x) { + return simd_make_float3(__tg_log2(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log2_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log2(simd_float4 x) { + return _simd_log2_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log2(simd_float4 x) { + return simd_make_float4(log2(x.x), log2(x.y), log2(x.z), log2(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log2_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log2(simd_float8 x) { + return _simd_log2_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log2(simd_float8 x) { + return simd_make_float8(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log2_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log2(simd_float16 x) { + return _simd_log2_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log2(simd_float16 x) { + return simd_make_float16(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log2_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log2(simd_double2 x) { + return _simd_log2_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log2(simd_double2 x) { + return simd_make_double2(log2(x.x), log2(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log2(simd_double3 x) { + return simd_make_double3(__tg_log2(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log2_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log2(simd_double4 x) { + return _simd_log2_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log2(simd_double4 x) { + return simd_make_double4(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log2_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log2(simd_double8 x) { + return _simd_log2_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log2(simd_double8 x) { + return simd_make_double8(__tg_log2(x.lo), __tg_log2(x.hi)); +} +#endif + +#pragma mark - log10 implementation +static inline SIMD_CFUNC simd_float2 __tg_log10(simd_float2 x) { + return simd_make_float2(__tg_log10(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log10(simd_float3 x) { + return simd_make_float3(__tg_log10(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log10_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log10(simd_float4 x) { + return _simd_log10_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log10(simd_float4 x) { + return simd_make_float4(log10(x.x), log10(x.y), log10(x.z), log10(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log10_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log10(simd_float8 x) { + return _simd_log10_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log10(simd_float8 x) { + return simd_make_float8(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log10_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log10(simd_float16 x) { + return _simd_log10_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log10(simd_float16 x) { + return simd_make_float16(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log10_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log10(simd_double2 x) { + return _simd_log10_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log10(simd_double2 x) { + return simd_make_double2(log10(x.x), log10(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log10(simd_double3 x) { + return simd_make_double3(__tg_log10(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log10_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log10(simd_double4 x) { + return _simd_log10_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log10(simd_double4 x) { + return simd_make_double4(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log10_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log10(simd_double8 x) { + return _simd_log10_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log10(simd_double8 x) { + return simd_make_double8(__tg_log10(x.lo), __tg_log10(x.hi)); +} +#endif + +#pragma mark - log1p implementation +static inline SIMD_CFUNC simd_float2 __tg_log1p(simd_float2 x) { + return simd_make_float2(__tg_log1p(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_log1p(simd_float3 x) { + return simd_make_float3(__tg_log1p(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_log1p_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_log1p(simd_float4 x) { + return _simd_log1p_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_log1p(simd_float4 x) { + return simd_make_float4(log1p(x.x), log1p(x.y), log1p(x.z), log1p(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_log1p_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_log1p(simd_float8 x) { + return _simd_log1p_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_log1p(simd_float8 x) { + return simd_make_float8(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_log1p_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_log1p(simd_float16 x) { + return _simd_log1p_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_log1p(simd_float16 x) { + return simd_make_float16(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_log1p_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_log1p(simd_double2 x) { + return _simd_log1p_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_log1p(simd_double2 x) { + return simd_make_double2(log1p(x.x), log1p(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_log1p(simd_double3 x) { + return simd_make_double3(__tg_log1p(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_log1p_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_log1p(simd_double4 x) { + return _simd_log1p_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_log1p(simd_double4 x) { + return simd_make_double4(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_log1p_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_log1p(simd_double8 x) { + return _simd_log1p_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_log1p(simd_double8 x) { + return simd_make_double8(__tg_log1p(x.lo), __tg_log1p(x.hi)); +} +#endif + +#pragma mark - cbrt implementation +static inline SIMD_CFUNC simd_float2 __tg_cbrt(simd_float2 x) { + return simd_make_float2(__tg_cbrt(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_cbrt(simd_float3 x) { + return simd_make_float3(__tg_cbrt(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_cbrt_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_cbrt(simd_float4 x) { + return _simd_cbrt_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_cbrt(simd_float4 x) { + return simd_make_float4(cbrt(x.x), cbrt(x.y), cbrt(x.z), cbrt(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_cbrt_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_cbrt(simd_float8 x) { + return _simd_cbrt_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_cbrt(simd_float8 x) { + return simd_make_float8(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_cbrt_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_cbrt(simd_float16 x) { + return _simd_cbrt_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_cbrt(simd_float16 x) { + return simd_make_float16(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_cbrt_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_cbrt(simd_double2 x) { + return _simd_cbrt_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_cbrt(simd_double2 x) { + return simd_make_double2(cbrt(x.x), cbrt(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_cbrt(simd_double3 x) { + return simd_make_double3(__tg_cbrt(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_cbrt_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_cbrt(simd_double4 x) { + return _simd_cbrt_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_cbrt(simd_double4 x) { + return simd_make_double4(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_cbrt_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_cbrt(simd_double8 x) { + return _simd_cbrt_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_cbrt(simd_double8 x) { + return simd_make_double8(__tg_cbrt(x.lo), __tg_cbrt(x.hi)); +} +#endif + +#pragma mark - erf implementation +static inline SIMD_CFUNC simd_float2 __tg_erf(simd_float2 x) { + return simd_make_float2(__tg_erf(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_erf(simd_float3 x) { + return simd_make_float3(__tg_erf(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_erf_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_erf(simd_float4 x) { + return _simd_erf_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_erf(simd_float4 x) { + return simd_make_float4(erf(x.x), erf(x.y), erf(x.z), erf(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_erf_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_erf(simd_float8 x) { + return _simd_erf_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_erf(simd_float8 x) { + return simd_make_float8(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_erf_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_erf(simd_float16 x) { + return _simd_erf_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_erf(simd_float16 x) { + return simd_make_float16(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_erf_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_erf(simd_double2 x) { + return _simd_erf_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_erf(simd_double2 x) { + return simd_make_double2(erf(x.x), erf(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_erf(simd_double3 x) { + return simd_make_double3(__tg_erf(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_erf_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_erf(simd_double4 x) { + return _simd_erf_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_erf(simd_double4 x) { + return simd_make_double4(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_erf_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_erf(simd_double8 x) { + return _simd_erf_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_erf(simd_double8 x) { + return simd_make_double8(__tg_erf(x.lo), __tg_erf(x.hi)); +} +#endif + +#pragma mark - erfc implementation +static inline SIMD_CFUNC simd_float2 __tg_erfc(simd_float2 x) { + return simd_make_float2(__tg_erfc(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_erfc(simd_float3 x) { + return simd_make_float3(__tg_erfc(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_erfc_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_erfc(simd_float4 x) { + return _simd_erfc_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_erfc(simd_float4 x) { + return simd_make_float4(erfc(x.x), erfc(x.y), erfc(x.z), erfc(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_erfc_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_erfc(simd_float8 x) { + return _simd_erfc_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_erfc(simd_float8 x) { + return simd_make_float8(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_erfc_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_erfc(simd_float16 x) { + return _simd_erfc_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_erfc(simd_float16 x) { + return simd_make_float16(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_erfc_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_erfc(simd_double2 x) { + return _simd_erfc_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_erfc(simd_double2 x) { + return simd_make_double2(erfc(x.x), erfc(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_erfc(simd_double3 x) { + return simd_make_double3(__tg_erfc(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_erfc_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_erfc(simd_double4 x) { + return _simd_erfc_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_erfc(simd_double4 x) { + return simd_make_double4(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_erfc_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_erfc(simd_double8 x) { + return _simd_erfc_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_erfc(simd_double8 x) { + return simd_make_double8(__tg_erfc(x.lo), __tg_erfc(x.hi)); +} +#endif + +#pragma mark - tgamma implementation +static inline SIMD_CFUNC simd_float2 __tg_tgamma(simd_float2 x) { + return simd_make_float2(__tg_tgamma(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_tgamma(simd_float3 x) { + return simd_make_float3(__tg_tgamma(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_tgamma_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_tgamma(simd_float4 x) { + return _simd_tgamma_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_tgamma(simd_float4 x) { + return simd_make_float4(tgamma(x.x), tgamma(x.y), tgamma(x.z), tgamma(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_tgamma_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_tgamma(simd_float8 x) { + return _simd_tgamma_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_tgamma(simd_float8 x) { + return simd_make_float8(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_tgamma_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_tgamma(simd_float16 x) { + return _simd_tgamma_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_tgamma(simd_float16 x) { + return simd_make_float16(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_tgamma_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_tgamma(simd_double2 x) { + return _simd_tgamma_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_tgamma(simd_double2 x) { + return simd_make_double2(tgamma(x.x), tgamma(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_tgamma(simd_double3 x) { + return simd_make_double3(__tg_tgamma(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_tgamma_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_tgamma(simd_double4 x) { + return _simd_tgamma_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_tgamma(simd_double4 x) { + return simd_make_double4(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_tgamma_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_tgamma(simd_double8 x) { + return _simd_tgamma_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_tgamma(simd_double8 x) { + return simd_make_double8(__tg_tgamma(x.lo), __tg_tgamma(x.hi)); +} +#endif + +#pragma mark - round implementation +static inline SIMD_CFUNC simd_float2 __tg_round(simd_float2 x) { + return simd_make_float2(__tg_round(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_round(simd_float3 x) { + return simd_make_float3(__tg_round(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_round_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_round(simd_float4 x) { +#if defined __arm64__ + return vrndaq_f32(x); +#else + return _simd_round_f4(x); +#endif +} +#else +static inline SIMD_CFUNC simd_float4 __tg_round(simd_float4 x) { + return simd_make_float4(round(x.x), round(x.y), round(x.z), round(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_round_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_round(simd_float8 x) { + return _simd_round_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_round(simd_float8 x) { + return simd_make_float8(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_round_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_round(simd_float16 x) { + return _simd_round_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_round(simd_float16 x) { + return simd_make_float16(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_round_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_round(simd_double2 x) { +#if defined __arm64__ + return vrndaq_f64(x); +#else + return _simd_round_d2(x); +#endif +} +#else +static inline SIMD_CFUNC simd_double2 __tg_round(simd_double2 x) { + return simd_make_double2(round(x.x), round(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_round(simd_double3 x) { + return simd_make_double3(__tg_round(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_round_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_round(simd_double4 x) { + return _simd_round_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_round(simd_double4 x) { + return simd_make_double4(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_round_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_round(simd_double8 x) { + return _simd_round_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_round(simd_double8 x) { + return simd_make_double8(__tg_round(x.lo), __tg_round(x.hi)); +} +#endif + +#pragma mark - atan2 implementation +static inline SIMD_CFUNC simd_float2 __tg_atan2(simd_float2 y, simd_float2 x) { + return simd_make_float2(__tg_atan2(simd_make_float4(y), simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_atan2(simd_float3 y, simd_float3 x) { + return simd_make_float3(__tg_atan2(simd_make_float4(y), simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_atan2_f4(simd_float4 y, simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_atan2(simd_float4 y, simd_float4 x) { + return _simd_atan2_f4(y, x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_atan2(simd_float4 y, simd_float4 x) { + return simd_make_float4(atan2(y.x, x.x), atan2(y.y, x.y), atan2(y.z, x.z), atan2(y.w, x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_atan2_f8(simd_float8 y, simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_atan2(simd_float8 y, simd_float8 x) { + return _simd_atan2_f8(y, x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_atan2(simd_float8 y, simd_float8 x) { + return simd_make_float8(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_atan2_f16(simd_float16 y, simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_atan2(simd_float16 y, simd_float16 x) { + return _simd_atan2_f16(y, x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_atan2(simd_float16 y, simd_float16 x) { + return simd_make_float16(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_atan2_d2(simd_double2 y, simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_atan2(simd_double2 y, simd_double2 x) { + return _simd_atan2_d2(y, x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_atan2(simd_double2 y, simd_double2 x) { + return simd_make_double2(atan2(y.x, x.x), atan2(y.y, x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_atan2(simd_double3 y, simd_double3 x) { + return simd_make_double3(__tg_atan2(simd_make_double4(y), simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_atan2_d4(simd_double4 y, simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_atan2(simd_double4 y, simd_double4 x) { + return _simd_atan2_d4(y, x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_atan2(simd_double4 y, simd_double4 x) { + return simd_make_double4(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_atan2_d8(simd_double8 y, simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_atan2(simd_double8 y, simd_double8 x) { + return _simd_atan2_d8(y, x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_atan2(simd_double8 y, simd_double8 x) { + return simd_make_double8(__tg_atan2(y.lo, x.lo), __tg_atan2(y.hi, x.hi)); +} +#endif + +#pragma mark - hypot implementation +static inline SIMD_CFUNC simd_float2 __tg_hypot(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_hypot(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_hypot(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_hypot(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_hypot_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_hypot(simd_float4 x, simd_float4 y) { + return _simd_hypot_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_hypot(simd_float4 x, simd_float4 y) { + return simd_make_float4(hypot(x.x, y.x), hypot(x.y, y.y), hypot(x.z, y.z), hypot(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_hypot_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_hypot(simd_float8 x, simd_float8 y) { + return _simd_hypot_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_hypot(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_hypot_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_hypot(simd_float16 x, simd_float16 y) { + return _simd_hypot_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_hypot(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_hypot_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_hypot(simd_double2 x, simd_double2 y) { + return _simd_hypot_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_hypot(simd_double2 x, simd_double2 y) { + return simd_make_double2(hypot(x.x, y.x), hypot(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_hypot(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_hypot(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_hypot_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_hypot(simd_double4 x, simd_double4 y) { + return _simd_hypot_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_hypot(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_hypot_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_hypot(simd_double8 x, simd_double8 y) { + return _simd_hypot_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_hypot(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_hypot(x.lo, y.lo), __tg_hypot(x.hi, y.hi)); +} +#endif + +#pragma mark - pow implementation +static inline SIMD_CFUNC simd_float2 __tg_pow(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_pow(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_pow(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_pow(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_pow_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_pow(simd_float4 x, simd_float4 y) { + return _simd_pow_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_pow(simd_float4 x, simd_float4 y) { + return simd_make_float4(pow(x.x, y.x), pow(x.y, y.y), pow(x.z, y.z), pow(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_pow_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_pow(simd_float8 x, simd_float8 y) { + return _simd_pow_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_pow(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_pow_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_pow(simd_float16 x, simd_float16 y) { + return _simd_pow_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_pow(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_pow_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_pow(simd_double2 x, simd_double2 y) { + return _simd_pow_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_pow(simd_double2 x, simd_double2 y) { + return simd_make_double2(pow(x.x, y.x), pow(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_pow(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_pow(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_pow_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_pow(simd_double4 x, simd_double4 y) { + return _simd_pow_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_pow(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_pow_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_pow(simd_double8 x, simd_double8 y) { + return _simd_pow_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_pow(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_pow(x.lo, y.lo), __tg_pow(x.hi, y.hi)); +} +#endif + +#pragma mark - fmod implementation +static inline SIMD_CFUNC simd_float2 __tg_fmod(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_fmod(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_fmod(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_fmod(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_fmod_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_fmod(simd_float4 x, simd_float4 y) { + return _simd_fmod_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_fmod(simd_float4 x, simd_float4 y) { + return simd_make_float4(fmod(x.x, y.x), fmod(x.y, y.y), fmod(x.z, y.z), fmod(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_fmod_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_fmod(simd_float8 x, simd_float8 y) { + return _simd_fmod_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_fmod(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_fmod_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_fmod(simd_float16 x, simd_float16 y) { + return _simd_fmod_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_fmod(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_fmod_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_fmod(simd_double2 x, simd_double2 y) { + return _simd_fmod_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_fmod(simd_double2 x, simd_double2 y) { + return simd_make_double2(fmod(x.x, y.x), fmod(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_fmod(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_fmod(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_fmod_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_fmod(simd_double4 x, simd_double4 y) { + return _simd_fmod_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_fmod(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_fmod_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_fmod(simd_double8 x, simd_double8 y) { + return _simd_fmod_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_fmod(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_fmod(x.lo, y.lo), __tg_fmod(x.hi, y.hi)); +} +#endif + +#pragma mark - remainder implementation +static inline SIMD_CFUNC simd_float2 __tg_remainder(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_remainder(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_remainder(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_remainder(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_remainder_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_remainder(simd_float4 x, simd_float4 y) { + return _simd_remainder_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_remainder(simd_float4 x, simd_float4 y) { + return simd_make_float4(remainder(x.x, y.x), remainder(x.y, y.y), remainder(x.z, y.z), remainder(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_remainder_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_remainder(simd_float8 x, simd_float8 y) { + return _simd_remainder_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_remainder(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_remainder_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_remainder(simd_float16 x, simd_float16 y) { + return _simd_remainder_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_remainder(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_remainder_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_remainder(simd_double2 x, simd_double2 y) { + return _simd_remainder_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_remainder(simd_double2 x, simd_double2 y) { + return simd_make_double2(remainder(x.x, y.x), remainder(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_remainder(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_remainder(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_remainder_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_remainder(simd_double4 x, simd_double4 y) { + return _simd_remainder_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_remainder(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_remainder_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_remainder(simd_double8 x, simd_double8 y) { + return _simd_remainder_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_remainder(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_remainder(x.lo, y.lo), __tg_remainder(x.hi, y.hi)); +} +#endif + +#pragma mark - nextafter implementation +static inline SIMD_CFUNC simd_float2 __tg_nextafter(simd_float2 x, simd_float2 y) { + return simd_make_float2(__tg_nextafter(simd_make_float4(x), simd_make_float4(y))); +} + +static inline SIMD_CFUNC simd_float3 __tg_nextafter(simd_float3 x, simd_float3 y) { + return simd_make_float3(__tg_nextafter(simd_make_float4(x), simd_make_float4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_float4 _simd_nextafter_f4(simd_float4 x, simd_float4 y); +static inline SIMD_CFUNC simd_float4 __tg_nextafter(simd_float4 x, simd_float4 y) { + return _simd_nextafter_f4(x, y); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_nextafter(simd_float4 x, simd_float4 y) { + return simd_make_float4(nextafter(x.x, y.x), nextafter(x.y, y.y), nextafter(x.z, y.z), nextafter(x.w, y.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_nextafter_f8(simd_float8 x, simd_float8 y); +static inline SIMD_CFUNC simd_float8 __tg_nextafter(simd_float8 x, simd_float8 y) { + return _simd_nextafter_f8(x, y); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_nextafter(simd_float8 x, simd_float8 y) { + return simd_make_float8(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_nextafter_f16(simd_float16 x, simd_float16 y); +static inline SIMD_CFUNC simd_float16 __tg_nextafter(simd_float16 x, simd_float16 y) { + return _simd_nextafter_f16(x, y); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_nextafter(simd_float16 x, simd_float16 y) { + return simd_make_float16(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_nextafter_d2(simd_double2 x, simd_double2 y); +static inline SIMD_CFUNC simd_double2 __tg_nextafter(simd_double2 x, simd_double2 y) { + return _simd_nextafter_d2(x, y); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_nextafter(simd_double2 x, simd_double2 y) { + return simd_make_double2(nextafter(x.x, y.x), nextafter(x.y, y.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_nextafter(simd_double3 x, simd_double3 y) { + return simd_make_double3(__tg_nextafter(simd_make_double4(x), simd_make_double4(y))); +} + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_nextafter_d4(simd_double4 x, simd_double4 y); +static inline SIMD_CFUNC simd_double4 __tg_nextafter(simd_double4 x, simd_double4 y) { + return _simd_nextafter_d4(x, y); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_nextafter(simd_double4 x, simd_double4 y) { + return simd_make_double4(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 3 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_nextafter_d8(simd_double8 x, simd_double8 y); +static inline SIMD_CFUNC simd_double8 __tg_nextafter(simd_double8 x, simd_double8 y) { + return _simd_nextafter_d8(x, y); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_nextafter(simd_double8 x, simd_double8 y) { + return simd_make_double8(__tg_nextafter(x.lo, y.lo), __tg_nextafter(x.hi, y.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 5 +#pragma mark - sincos implementation +static inline SIMD_NONCONST void __tg_sincos(simd_float2 x, simd_float2 *sinp, simd_float2 *cosp) { + simd_float4 sin_val; + simd_float4 cos_val; + __tg_sincos(simd_make_float4(x), &sin_val, &cos_val); + *sinp = simd_make_float2(sin_val); + *cosp = simd_make_float2(cos_val); +} + +static inline SIMD_NONCONST void __tg_sincos(simd_float3 x, simd_float3 *sinp, simd_float3 *cosp) { + simd_float4 sin_val; + simd_float4 cos_val; + __tg_sincos(simd_make_float4(x), &sin_val, &cos_val); + *sinp = simd_make_float3(sin_val); + *cosp = simd_make_float3(cos_val); +} + +extern void _simd_sincos_f4(simd_float4 x, simd_float4 *sinp, simd_float4 *cosp); +static inline SIMD_NONCONST void __tg_sincos(simd_float4 x, simd_float4 *sinp, simd_float4 *cosp) { + return _simd_sincos_f4(x, sinp, cosp); +} + +static inline SIMD_NONCONST void __tg_sincos(simd_float8 x, simd_float8 *sinp, simd_float8 *cosp) { + __tg_sincos(x.lo, (simd_float4 *)sinp+0, (simd_float4 *)cosp+0); + __tg_sincos(x.hi, (simd_float4 *)sinp+1, (simd_float4 *)cosp+1); +} + +static inline SIMD_NONCONST void __tg_sincos(simd_float16 x, simd_float16 *sinp, simd_float16 *cosp) { + __tg_sincos(x.lo, (simd_float8 *)sinp+0, (simd_float8 *)cosp+0); + __tg_sincos(x.hi, (simd_float8 *)sinp+1, (simd_float8 *)cosp+1); +} + +extern void _simd_sincos_d2(simd_double2 x, simd_double2 *sinp, simd_double2 *cosp); +static inline SIMD_NONCONST void __tg_sincos(simd_double2 x, simd_double2 *sinp, simd_double2 *cosp) { + return _simd_sincos_d2(x, sinp, cosp); +} + +static inline SIMD_NONCONST void __tg_sincos(simd_double3 x, simd_double3 *sinp, simd_double3 *cosp) { + simd_double4 sin_val; + simd_double4 cos_val; + __tg_sincos(simd_make_double4(x), &sin_val, &cos_val); + *sinp = simd_make_double3(sin_val); + *cosp = simd_make_double3(cos_val); +} + +static inline SIMD_NONCONST void __tg_sincos(simd_double4 x, simd_double4 *sinp, simd_double4 *cosp) { + __tg_sincos(x.lo, (simd_double2 *)sinp+0, (simd_double2 *)cosp+0); + __tg_sincos(x.hi, (simd_double2 *)sinp+1, (simd_double2 *)cosp+1); +} + +static inline SIMD_NONCONST void __tg_sincos(simd_double8 x, simd_double8 *sinp, simd_double8 *cosp) { + __tg_sincos(x.lo, (simd_double4 *)sinp+0, (simd_double4 *)cosp+0); + __tg_sincos(x.hi, (simd_double4 *)sinp+1, (simd_double4 *)cosp+1); +} + +#pragma mark - sincospi implementation +static inline SIMD_NONCONST void __tg_sincospi(simd_float2 x, simd_float2 *sinp, simd_float2 *cosp) { + simd_float4 sin_val; + simd_float4 cos_val; + __tg_sincospi(simd_make_float4(x), &sin_val, &cos_val); + *sinp = simd_make_float2(sin_val); + *cosp = simd_make_float2(cos_val); +} + +static inline SIMD_NONCONST void __tg_sincospi(simd_float3 x, simd_float3 *sinp, simd_float3 *cosp) { + simd_float4 sin_val; + simd_float4 cos_val; + __tg_sincospi(simd_make_float4(x), &sin_val, &cos_val); + *sinp = simd_make_float3(sin_val); + *cosp = simd_make_float3(cos_val); +} + +extern void _simd_sincospi_f4(simd_float4 x, simd_float4 *sinp, simd_float4 *cosp); +static inline SIMD_NONCONST void __tg_sincospi(simd_float4 x, simd_float4 *sinp, simd_float4 *cosp) { + return _simd_sincospi_f4(x, sinp, cosp); +} + +static inline SIMD_NONCONST void __tg_sincospi(simd_float8 x, simd_float8 *sinp, simd_float8 *cosp) { + __tg_sincospi(x.lo, (simd_float4 *)sinp+0, (simd_float4 *)cosp+0); + __tg_sincospi(x.hi, (simd_float4 *)sinp+1, (simd_float4 *)cosp+1); +} + +static inline SIMD_NONCONST void __tg_sincospi(simd_float16 x, simd_float16 *sinp, simd_float16 *cosp) { + __tg_sincospi(x.lo, (simd_float8 *)sinp+0, (simd_float8 *)cosp+0); + __tg_sincospi(x.hi, (simd_float8 *)sinp+1, (simd_float8 *)cosp+1); +} + +extern void _simd_sincospi_d2(simd_double2 x, simd_double2 *sinp, simd_double2 *cosp); +static inline SIMD_NONCONST void __tg_sincospi(simd_double2 x, simd_double2 *sinp, simd_double2 *cosp) { + return _simd_sincospi_d2(x, sinp, cosp); +} + +static inline SIMD_NONCONST void __tg_sincospi(simd_double3 x, simd_double3 *sinp, simd_double3 *cosp) { + simd_double4 sin_val; + simd_double4 cos_val; + __tg_sincospi(simd_make_double4(x), &sin_val, &cos_val); + *sinp = simd_make_double3(sin_val); + *cosp = simd_make_double3(cos_val); +} + +static inline SIMD_NONCONST void __tg_sincospi(simd_double4 x, simd_double4 *sinp, simd_double4 *cosp) { + __tg_sincospi(x.lo, (simd_double2 *)sinp+0, (simd_double2 *)cosp+0); + __tg_sincospi(x.hi, (simd_double2 *)sinp+1, (simd_double2 *)cosp+1); +} + +static inline SIMD_NONCONST void __tg_sincospi(simd_double8 x, simd_double8 *sinp, simd_double8 *cosp) { + __tg_sincospi(x.lo, (simd_double4 *)sinp+0, (simd_double4 *)cosp+0); + __tg_sincospi(x.hi, (simd_double4 *)sinp+1, (simd_double4 *)cosp+1); +} + +#endif // SIMD_LIBRARY_VERSION >= 5 +#pragma mark - lgamma implementation +static inline SIMD_CFUNC simd_float2 __tg_lgamma(simd_float2 x) { + return simd_make_float2(__tg_lgamma(simd_make_float4(x))); +} + +static inline SIMD_CFUNC simd_float3 __tg_lgamma(simd_float3 x) { + return simd_make_float3(__tg_lgamma(simd_make_float4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 4 +extern simd_float4 _simd_lgamma_f4(simd_float4 x); +static inline SIMD_CFUNC simd_float4 __tg_lgamma(simd_float4 x) { + return _simd_lgamma_f4(x); +} +#else +static inline SIMD_CFUNC simd_float4 __tg_lgamma(simd_float4 x) { + return simd_make_float4(lgamma(x.x), lgamma(x.y), lgamma(x.z), lgamma(x.w)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 4 && defined __x86_64__ && defined __AVX2__ +extern simd_float8 _simd_lgamma_f8(simd_float8 x); +static inline SIMD_CFUNC simd_float8 __tg_lgamma(simd_float8 x) { + return _simd_lgamma_f8(x); +} +#else +static inline SIMD_CFUNC simd_float8 __tg_lgamma(simd_float8 x) { + return simd_make_float8(__tg_lgamma(x.lo), __tg_lgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 4 && defined __x86_64__ && defined __AVX512F__ +extern simd_float16 _simd_lgamma_f16(simd_float16 x); +static inline SIMD_CFUNC simd_float16 __tg_lgamma(simd_float16 x) { + return _simd_lgamma_f16(x); +} +#else +static inline SIMD_CFUNC simd_float16 __tg_lgamma(simd_float16 x) { + return simd_make_float16(__tg_lgamma(x.lo), __tg_lgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 4 +extern simd_double2 _simd_lgamma_d2(simd_double2 x); +static inline SIMD_CFUNC simd_double2 __tg_lgamma(simd_double2 x) { + return _simd_lgamma_d2(x); +} +#else +static inline SIMD_CFUNC simd_double2 __tg_lgamma(simd_double2 x) { + return simd_make_double2(lgamma(x.x), lgamma(x.y)); +} +#endif + +static inline SIMD_CFUNC simd_double3 __tg_lgamma(simd_double3 x) { + return simd_make_double3(__tg_lgamma(simd_make_double4(x))); +} + +#if SIMD_LIBRARY_VERSION >= 4 && defined __x86_64__ && defined __AVX2__ +extern simd_double4 _simd_lgamma_d4(simd_double4 x); +static inline SIMD_CFUNC simd_double4 __tg_lgamma(simd_double4 x) { + return _simd_lgamma_d4(x); +} +#else +static inline SIMD_CFUNC simd_double4 __tg_lgamma(simd_double4 x) { + return simd_make_double4(__tg_lgamma(x.lo), __tg_lgamma(x.hi)); +} +#endif + +#if SIMD_LIBRARY_VERSION >= 4 && defined __x86_64__ && defined __AVX512F__ +extern simd_double8 _simd_lgamma_d8(simd_double8 x); +static inline SIMD_CFUNC simd_double8 __tg_lgamma(simd_double8 x) { + return _simd_lgamma_d8(x); +} +#else +static inline SIMD_CFUNC simd_double8 __tg_lgamma(simd_double8 x) { + return simd_make_double8(__tg_lgamma(x.lo), __tg_lgamma(x.hi)); +} +#endif + +static inline SIMD_CFUNC simd_float2 __tg_fdim(simd_float2 x, simd_float2 y) { return simd_bitselect(x-y, 0, x= 3 +extern simd_float4 _simd_fma_f4(simd_float4 x, simd_float4 y, simd_float4 z); +#endif +static inline SIMD_CFUNC simd_float4 __tg_fma(simd_float4 x, simd_float4 y, simd_float4 z) { +#if defined __arm64__ || defined __ARM_VFPV4__ + return vfmaq_f32(z, x, y); +#elif (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm_fmadd_ps(x, y, z); +#elif SIMD_LIBRARY_VERSION >= 3 + return _simd_fma_f4(x, y, z); +#else + return simd_make_float4(fma(x.x, y.x, z.x), fma(x.y, y.y, z.y), fma(x.z, y.z, z.z), fma(x.w, y.w, z.w)); +#endif +} + +static inline SIMD_CFUNC simd_float8 __tg_fma(simd_float8 x, simd_float8 y, simd_float8 z) { +#if (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm256_fmadd_ps(x, y, z); +#else + return simd_make_float8(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +static inline SIMD_CFUNC simd_float16 __tg_fma(simd_float16 x, simd_float16 y, simd_float16 z) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_fmadd_ps(x, y, z); +#else + return simd_make_float16(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +#if SIMD_LIBRARY_VERSION >= 3 +extern simd_double2 _simd_fma_d2(simd_double2 x, simd_double2 y, simd_double2 z); +#endif +static inline SIMD_CFUNC simd_double2 __tg_fma(simd_double2 x, simd_double2 y, simd_double2 z) { +#if defined __arm64__ + return vfmaq_f64(z, x, y); +#elif (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm_fmadd_pd(x, y, z); +#elif SIMD_LIBRARY_VERSION >= 3 + return _simd_fma_d2(x, y, z); +#else + return simd_make_double2(fma(x.x, y.x, z.x), fma(x.y, y.y, z.y)); +#endif +} + +static inline SIMD_CFUNC simd_double3 __tg_fma(simd_double3 x, simd_double3 y, simd_double3 z) { + return simd_make_double3(__tg_fma(simd_make_double4(x), simd_make_double4(y), simd_make_double4(z))); +} + +static inline SIMD_CFUNC simd_double4 __tg_fma(simd_double4 x, simd_double4 y, simd_double4 z) { +#if (defined __i386__ || defined __x86_64__) && defined __FMA__ + return _mm256_fmadd_pd(x, y, z); +#else + return simd_make_double4(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +static inline SIMD_CFUNC simd_double8 __tg_fma(simd_double8 x, simd_double8 y, simd_double8 z) { +#if defined __x86_64__ && defined __AVX512F__ + return _mm512_fmadd_pd(x, y, z); +#else + return simd_make_double8(__tg_fma(x.lo, y.lo, z.lo), __tg_fma(x.hi, y.hi, z.hi)); +#endif +} + +static inline SIMD_CFUNC float simd_muladd(float x, float y, float z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float2 simd_muladd(simd_float2 x, simd_float2 y, simd_float2 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float3 simd_muladd(simd_float3 x, simd_float3 y, simd_float3 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float4 simd_muladd(simd_float4 x, simd_float4 y, simd_float4 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float8 simd_muladd(simd_float8 x, simd_float8 y, simd_float8 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_float16 simd_muladd(simd_float16 x, simd_float16 y, simd_float16 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC double simd_muladd(double x, double y, double z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double2 simd_muladd(simd_double2 x, simd_double2 y, simd_double2 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double3 simd_muladd(simd_double3 x, simd_double3 y, simd_double3 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double4 simd_muladd(simd_double4 x, simd_double4 y, simd_double4 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +static inline SIMD_CFUNC simd_double8 simd_muladd(simd_double8 x, simd_double8 y, simd_double8 z) { +#pragma STDC FP_CONTRACT ON + return x*y + z; +} +#ifdef __cplusplus +} /* extern "C" */ +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_MATH_HEADER */ diff --git a/vfsoverlay/matrix.h b/vfsoverlay/matrix.h new file mode 100644 index 00000000..bfc07b96 --- /dev/null +++ b/vfsoverlay/matrix.h @@ -0,0 +1,1990 @@ +/* Copyright (c) 2014-2017 Apple, Inc. All rights reserved. + * + * Function Result + * ------------------------------------------------------------------ + * + * simd_diagonal_matrix(x) A square matrix with the vector x + * as its diagonal. + * + * simd_matrix(c0, c1, ... ) A matrix with the specified vectors + * as columns. + * + * simd_matrix_from_rows(r0, r1, ... ) A matrix with the specified vectors + * as rows. + * + * simd_mul(a,x) Scalar product a*x. + * + * simd_linear_combination(a,x,b,y) a*x + b*y. + * + * simd_add(x,y) Macro wrapping linear_combination + * to compute x + y. + * + * simd_sub(x,y) Macro wrapping linear_combination + * to compute x - y. + * + * simd_transpose(x) Transpose of the matrix x. + * + * simd_trace(x) Trace of the matrix x. + * + * simd_determinant(x) Determinant of the matrix x. + * + * simd_inverse(x) Inverse of x if x is non-singular. If + * x is singular, the result is undefined. + * + * simd_mul(x,y) If x is a matrix, returns the matrix + * product x*y, where y is either a matrix + * or a column vector. If x is a vector, + * returns the product x*y where x is + * interpreted as a row vector. + * + * simd_equal(x,y) Returns true if and only if every + * element of x is exactly equal to the + * corresponding element of y. + * + * simd_almost_equal_elements(x,y,tol) + * Returns true if and only if for each + * entry xij in x, the corresponding + * element yij in y satisfies + * |xij - yij| <= tol. + * + * simd_almost_equal_elements_relative(x,y,tol) + * Returns true if and only if for each + * entry xij in x, the corresponding + * element yij in y satisfies + * |xij - yij| <= tol*|xij|. + * + * The header also defines a few useful global matrix objects: + * matrix_identity_floatNxM and matrix_identity_doubleNxM, may be used to get + * an identity matrix of the specified size. + * + * In C++, we are able to use namespacing to make the functions more concise; + * we also overload some common arithmetic operators to work with the matrix + * types: + * + * C++ Function Equivalent C Function + * -------------------------------------------------------------------- + * simd::inverse simd_inverse + * simd::transpose simd_transpose + * operator+ simd_add + * operator- simd_sub + * operator+= N/A + * operator-= N/A + * operator* simd_mul or simd_mul + * operator*= simd_mul or simd_mul + * operator== simd_equal + * operator!= !simd_equal + * simd::almost_equal_elements simd_almost_equal_elements + * simd::almost_equal_elements_relative simd_almost_equal_elements_relative + * + * provides constructors for C++ matrix types. + */ + +#ifndef SIMD_MATRIX_HEADER +#define SIMD_MATRIX_HEADER + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include +#include +#include +#include + +#ifdef __cplusplus + extern "C" { +#endif + +extern const simd_float2x2 matrix_identity_float2x2 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +extern const simd_float3x3 matrix_identity_float3x3 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +extern const simd_float4x4 matrix_identity_float4x4 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +extern const simd_double2x2 matrix_identity_double2x2 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +extern const simd_double3x3 matrix_identity_double3x3 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +extern const simd_double4x4 matrix_identity_double4x4 __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); + +static simd_float2x2 SIMD_CFUNC simd_diagonal_matrix(simd_float2 __x); +static simd_float3x3 SIMD_CFUNC simd_diagonal_matrix(simd_float3 __x); +static simd_float4x4 SIMD_CFUNC simd_diagonal_matrix(simd_float4 __x); +static simd_double2x2 SIMD_CFUNC simd_diagonal_matrix(simd_double2 __x); +static simd_double3x3 SIMD_CFUNC simd_diagonal_matrix(simd_double3 __x); +static simd_double4x4 SIMD_CFUNC simd_diagonal_matrix(simd_double4 __x); +#define matrix_from_diagonal simd_diagonal_matrix + +static simd_float2x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1); +static simd_float3x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1, simd_float2 col2); +static simd_float4x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1, simd_float2 col2, simd_float2 col3); +static simd_float2x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1); +static simd_float3x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1, simd_float3 col2); +static simd_float4x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1, simd_float3 col2, simd_float3 col3); +static simd_float2x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1); +static simd_float3x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1, simd_float4 col2); +static simd_float4x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1, simd_float4 col2, simd_float4 col3); +static simd_double2x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1); +static simd_double3x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2); +static simd_double4x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2, simd_double2 col3); +static simd_double2x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1); +static simd_double3x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2); +static simd_double4x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2, simd_double3 col3); +static simd_double2x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1); +static simd_double3x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2); +static simd_double4x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2, simd_double4 col3); +#define matrix_from_columns simd_matrix + +static simd_float2x2 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1); +static simd_float2x3 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1, simd_float2 row2); +static simd_float2x4 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1, simd_float2 row2, simd_float2 row3); +static simd_float3x2 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1); +static simd_float3x3 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1, simd_float3 row2); +static simd_float3x4 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1, simd_float3 row2, simd_float3 row3); +static simd_float4x2 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1); +static simd_float4x3 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1, simd_float4 row2); +static simd_float4x4 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1, simd_float4 row2, simd_float4 row3); +static simd_double2x2 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1); +static simd_double2x3 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2); +static simd_double2x4 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2, simd_double2 row3); +static simd_double3x2 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1); +static simd_double3x3 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2); +static simd_double3x4 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2, simd_double3 row3); +static simd_double4x2 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1); +static simd_double4x3 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2); +static simd_double4x4 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2, simd_double4 row3); +#define matrix_from_rows simd_matrix_from_rows + +static simd_float3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatf q); +static simd_float4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatf q); +static simd_double3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatd q); +static simd_double4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatd q); + +static simd_float2x2 SIMD_CFUNC simd_mul(float __a, simd_float2x2 __x); +static simd_float3x2 SIMD_CFUNC simd_mul(float __a, simd_float3x2 __x); +static simd_float4x2 SIMD_CFUNC simd_mul(float __a, simd_float4x2 __x); +static simd_float2x3 SIMD_CFUNC simd_mul(float __a, simd_float2x3 __x); +static simd_float3x3 SIMD_CFUNC simd_mul(float __a, simd_float3x3 __x); +static simd_float4x3 SIMD_CFUNC simd_mul(float __a, simd_float4x3 __x); +static simd_float2x4 SIMD_CFUNC simd_mul(float __a, simd_float2x4 __x); +static simd_float3x4 SIMD_CFUNC simd_mul(float __a, simd_float3x4 __x); +static simd_float4x4 SIMD_CFUNC simd_mul(float __a, simd_float4x4 __x); +static simd_double2x2 SIMD_CFUNC simd_mul(double __a, simd_double2x2 __x); +static simd_double3x2 SIMD_CFUNC simd_mul(double __a, simd_double3x2 __x); +static simd_double4x2 SIMD_CFUNC simd_mul(double __a, simd_double4x2 __x); +static simd_double2x3 SIMD_CFUNC simd_mul(double __a, simd_double2x3 __x); +static simd_double3x3 SIMD_CFUNC simd_mul(double __a, simd_double3x3 __x); +static simd_double4x3 SIMD_CFUNC simd_mul(double __a, simd_double4x3 __x); +static simd_double2x4 SIMD_CFUNC simd_mul(double __a, simd_double2x4 __x); +static simd_double3x4 SIMD_CFUNC simd_mul(double __a, simd_double3x4 __x); +static simd_double4x4 SIMD_CFUNC simd_mul(double __a, simd_double4x4 __x); + +static simd_float2x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x2 __x, float __b, simd_float2x2 __y); +static simd_float3x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x2 __x, float __b, simd_float3x2 __y); +static simd_float4x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x2 __x, float __b, simd_float4x2 __y); +static simd_float2x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x3 __x, float __b, simd_float2x3 __y); +static simd_float3x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x3 __x, float __b, simd_float3x3 __y); +static simd_float4x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x3 __x, float __b, simd_float4x3 __y); +static simd_float2x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x4 __x, float __b, simd_float2x4 __y); +static simd_float3x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x4 __x, float __b, simd_float3x4 __y); +static simd_float4x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x4 __x, float __b, simd_float4x4 __y); +static simd_double2x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x2 __x, double __b, simd_double2x2 __y); +static simd_double3x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x2 __x, double __b, simd_double3x2 __y); +static simd_double4x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x2 __x, double __b, simd_double4x2 __y); +static simd_double2x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x3 __x, double __b, simd_double2x3 __y); +static simd_double3x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x3 __x, double __b, simd_double3x3 __y); +static simd_double4x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x3 __x, double __b, simd_double4x3 __y); +static simd_double2x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x4 __x, double __b, simd_double2x4 __y); +static simd_double3x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x4 __x, double __b, simd_double3x4 __y); +static simd_double4x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x4 __x, double __b, simd_double4x4 __y); +#define matrix_linear_combination simd_linear_combination + +static simd_float2x2 SIMD_CFUNC simd_add(simd_float2x2 __x, simd_float2x2 __y); +static simd_float3x2 SIMD_CFUNC simd_add(simd_float3x2 __x, simd_float3x2 __y); +static simd_float4x2 SIMD_CFUNC simd_add(simd_float4x2 __x, simd_float4x2 __y); +static simd_float2x3 SIMD_CFUNC simd_add(simd_float2x3 __x, simd_float2x3 __y); +static simd_float3x3 SIMD_CFUNC simd_add(simd_float3x3 __x, simd_float3x3 __y); +static simd_float4x3 SIMD_CFUNC simd_add(simd_float4x3 __x, simd_float4x3 __y); +static simd_float2x4 SIMD_CFUNC simd_add(simd_float2x4 __x, simd_float2x4 __y); +static simd_float3x4 SIMD_CFUNC simd_add(simd_float3x4 __x, simd_float3x4 __y); +static simd_float4x4 SIMD_CFUNC simd_add(simd_float4x4 __x, simd_float4x4 __y); +static simd_double2x2 SIMD_CFUNC simd_add(simd_double2x2 __x, simd_double2x2 __y); +static simd_double3x2 SIMD_CFUNC simd_add(simd_double3x2 __x, simd_double3x2 __y); +static simd_double4x2 SIMD_CFUNC simd_add(simd_double4x2 __x, simd_double4x2 __y); +static simd_double2x3 SIMD_CFUNC simd_add(simd_double2x3 __x, simd_double2x3 __y); +static simd_double3x3 SIMD_CFUNC simd_add(simd_double3x3 __x, simd_double3x3 __y); +static simd_double4x3 SIMD_CFUNC simd_add(simd_double4x3 __x, simd_double4x3 __y); +static simd_double2x4 SIMD_CFUNC simd_add(simd_double2x4 __x, simd_double2x4 __y); +static simd_double3x4 SIMD_CFUNC simd_add(simd_double3x4 __x, simd_double3x4 __y); +static simd_double4x4 SIMD_CFUNC simd_add(simd_double4x4 __x, simd_double4x4 __y); +#define matrix_add simd_add + +static simd_float2x2 SIMD_CFUNC simd_sub(simd_float2x2 __x, simd_float2x2 __y); +static simd_float3x2 SIMD_CFUNC simd_sub(simd_float3x2 __x, simd_float3x2 __y); +static simd_float4x2 SIMD_CFUNC simd_sub(simd_float4x2 __x, simd_float4x2 __y); +static simd_float2x3 SIMD_CFUNC simd_sub(simd_float2x3 __x, simd_float2x3 __y); +static simd_float3x3 SIMD_CFUNC simd_sub(simd_float3x3 __x, simd_float3x3 __y); +static simd_float4x3 SIMD_CFUNC simd_sub(simd_float4x3 __x, simd_float4x3 __y); +static simd_float2x4 SIMD_CFUNC simd_sub(simd_float2x4 __x, simd_float2x4 __y); +static simd_float3x4 SIMD_CFUNC simd_sub(simd_float3x4 __x, simd_float3x4 __y); +static simd_float4x4 SIMD_CFUNC simd_sub(simd_float4x4 __x, simd_float4x4 __y); +static simd_double2x2 SIMD_CFUNC simd_sub(simd_double2x2 __x, simd_double2x2 __y); +static simd_double3x2 SIMD_CFUNC simd_sub(simd_double3x2 __x, simd_double3x2 __y); +static simd_double4x2 SIMD_CFUNC simd_sub(simd_double4x2 __x, simd_double4x2 __y); +static simd_double2x3 SIMD_CFUNC simd_sub(simd_double2x3 __x, simd_double2x3 __y); +static simd_double3x3 SIMD_CFUNC simd_sub(simd_double3x3 __x, simd_double3x3 __y); +static simd_double4x3 SIMD_CFUNC simd_sub(simd_double4x3 __x, simd_double4x3 __y); +static simd_double2x4 SIMD_CFUNC simd_sub(simd_double2x4 __x, simd_double2x4 __y); +static simd_double3x4 SIMD_CFUNC simd_sub(simd_double3x4 __x, simd_double3x4 __y); +static simd_double4x4 SIMD_CFUNC simd_sub(simd_double4x4 __x, simd_double4x4 __y); +#define matrix_sub simd_sub + +static simd_float2x2 SIMD_CFUNC simd_transpose(simd_float2x2 __x); +static simd_float2x3 SIMD_CFUNC simd_transpose(simd_float3x2 __x); +static simd_float2x4 SIMD_CFUNC simd_transpose(simd_float4x2 __x); +static simd_float3x2 SIMD_CFUNC simd_transpose(simd_float2x3 __x); +static simd_float3x3 SIMD_CFUNC simd_transpose(simd_float3x3 __x); +static simd_float3x4 SIMD_CFUNC simd_transpose(simd_float4x3 __x); +static simd_float4x2 SIMD_CFUNC simd_transpose(simd_float2x4 __x); +static simd_float4x3 SIMD_CFUNC simd_transpose(simd_float3x4 __x); +static simd_float4x4 SIMD_CFUNC simd_transpose(simd_float4x4 __x); +static simd_double2x2 SIMD_CFUNC simd_transpose(simd_double2x2 __x); +static simd_double2x3 SIMD_CFUNC simd_transpose(simd_double3x2 __x); +static simd_double2x4 SIMD_CFUNC simd_transpose(simd_double4x2 __x); +static simd_double3x2 SIMD_CFUNC simd_transpose(simd_double2x3 __x); +static simd_double3x3 SIMD_CFUNC simd_transpose(simd_double3x3 __x); +static simd_double3x4 SIMD_CFUNC simd_transpose(simd_double4x3 __x); +static simd_double4x2 SIMD_CFUNC simd_transpose(simd_double2x4 __x); +static simd_double4x3 SIMD_CFUNC simd_transpose(simd_double3x4 __x); +static simd_double4x4 SIMD_CFUNC simd_transpose(simd_double4x4 __x); +#define matrix_transpose simd_transpose + +static float SIMD_CFUNC simd_trace(simd_float2x2 __x); +static float SIMD_CFUNC simd_trace(simd_float3x3 __x); +static float SIMD_CFUNC simd_trace(simd_float4x4 __x); +static double SIMD_CFUNC simd_trace(simd_double2x2 __x); +static double SIMD_CFUNC simd_trace(simd_double3x3 __x); +static double SIMD_CFUNC simd_trace(simd_double4x4 __x); +#define matrix_trace simd_trace + +static float SIMD_CFUNC simd_determinant(simd_float2x2 __x); +static float SIMD_CFUNC simd_determinant(simd_float3x3 __x); +static float SIMD_CFUNC simd_determinant(simd_float4x4 __x); +static double SIMD_CFUNC simd_determinant(simd_double2x2 __x); +static double SIMD_CFUNC simd_determinant(simd_double3x3 __x); +static double SIMD_CFUNC simd_determinant(simd_double4x4 __x); +#define matrix_determinant simd_determinant + +static simd_float2x2 SIMD_CFUNC simd_inverse(simd_float2x2 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +static simd_float3x3 SIMD_CFUNC simd_inverse(simd_float3x3 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +static simd_float4x4 SIMD_CFUNC simd_inverse(simd_float4x4 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +static simd_double2x2 SIMD_CFUNC simd_inverse(simd_double2x2 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +static simd_double3x3 SIMD_CFUNC simd_inverse(simd_double3x3 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +static simd_double4x4 SIMD_CFUNC simd_inverse(simd_double4x4 __x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)); +#define matrix_invert simd_inverse + +static simd_float2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float2 __y); +static simd_float2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float3 __y); +static simd_float2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float4 __y); +static simd_float3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float2 __y); +static simd_float3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float3 __y); +static simd_float3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float4 __y); +static simd_float4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float2 __y); +static simd_float4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float3 __y); +static simd_float4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float4 __y); +static simd_double2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2 __y); +static simd_double2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3 __y); +static simd_double2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4 __y); +static simd_double3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2 __y); +static simd_double3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3 __y); +static simd_double3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4 __y); +static simd_double4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2 __y); +static simd_double4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3 __y); +static simd_double4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4 __y); +static simd_float2 SIMD_CFUNC simd_mul(simd_float2 __x, simd_float2x2 __y); +static simd_float3 SIMD_CFUNC simd_mul(simd_float2 __x, simd_float3x2 __y); +static simd_float4 SIMD_CFUNC simd_mul(simd_float2 __x, simd_float4x2 __y); +static simd_float2 SIMD_CFUNC simd_mul(simd_float3 __x, simd_float2x3 __y); +static simd_float3 SIMD_CFUNC simd_mul(simd_float3 __x, simd_float3x3 __y); +static simd_float4 SIMD_CFUNC simd_mul(simd_float3 __x, simd_float4x3 __y); +static simd_float2 SIMD_CFUNC simd_mul(simd_float4 __x, simd_float2x4 __y); +static simd_float3 SIMD_CFUNC simd_mul(simd_float4 __x, simd_float3x4 __y); +static simd_float4 SIMD_CFUNC simd_mul(simd_float4 __x, simd_float4x4 __y); +static simd_double2 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double2x2 __y); +static simd_double3 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double3x2 __y); +static simd_double4 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double4x2 __y); +static simd_double2 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double2x3 __y); +static simd_double3 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double3x3 __y); +static simd_double4 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double4x3 __y); +static simd_double2 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double2x4 __y); +static simd_double3 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double3x4 __y); +static simd_double4 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double4x4 __y); +static simd_float2x2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float2x2 __y); +static simd_float3x2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float3x2 __y); +static simd_float4x2 SIMD_CFUNC simd_mul(simd_float2x2 __x, simd_float4x2 __y); +static simd_float2x3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float2x2 __y); +static simd_float3x3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float3x2 __y); +static simd_float4x3 SIMD_CFUNC simd_mul(simd_float2x3 __x, simd_float4x2 __y); +static simd_float2x4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float2x2 __y); +static simd_float3x4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float3x2 __y); +static simd_float4x4 SIMD_CFUNC simd_mul(simd_float2x4 __x, simd_float4x2 __y); +static simd_double2x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2x2 __y); +static simd_double3x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double3x2 __y); +static simd_double4x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double4x2 __y); +static simd_double2x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2x2 __y); +static simd_double3x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double3x2 __y); +static simd_double4x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double4x2 __y); +static simd_double2x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2x2 __y); +static simd_double3x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double3x2 __y); +static simd_double4x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double4x2 __y); +static simd_float2x2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float2x3 __y); +static simd_float3x2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float3x3 __y); +static simd_float4x2 SIMD_CFUNC simd_mul(simd_float3x2 __x, simd_float4x3 __y); +static simd_float2x3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float2x3 __y); +static simd_float3x3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float3x3 __y); +static simd_float4x3 SIMD_CFUNC simd_mul(simd_float3x3 __x, simd_float4x3 __y); +static simd_float2x4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float2x3 __y); +static simd_float3x4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float3x3 __y); +static simd_float4x4 SIMD_CFUNC simd_mul(simd_float3x4 __x, simd_float4x3 __y); +static simd_double2x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double2x3 __y); +static simd_double3x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3x3 __y); +static simd_double4x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double4x3 __y); +static simd_double2x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double2x3 __y); +static simd_double3x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3x3 __y); +static simd_double4x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double4x3 __y); +static simd_double2x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double2x3 __y); +static simd_double3x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3x3 __y); +static simd_double4x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double4x3 __y); +static simd_float2x2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float2x4 __y); +static simd_float3x2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float3x4 __y); +static simd_float4x2 SIMD_CFUNC simd_mul(simd_float4x2 __x, simd_float4x4 __y); +static simd_float2x3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float2x4 __y); +static simd_float3x3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float3x4 __y); +static simd_float4x3 SIMD_CFUNC simd_mul(simd_float4x3 __x, simd_float4x4 __y); +static simd_float2x4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float2x4 __y); +static simd_float3x4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float3x4 __y); +static simd_float4x4 SIMD_CFUNC simd_mul(simd_float4x4 __x, simd_float4x4 __y); +static simd_double2x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double2x4 __y); +static simd_double3x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double3x4 __y); +static simd_double4x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4x4 __y); +static simd_double2x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double2x4 __y); +static simd_double3x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double3x4 __y); +static simd_double4x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4x4 __y); +static simd_double2x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double2x4 __y); +static simd_double3x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double3x4 __y); +static simd_double4x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4x4 __y); + +static simd_bool SIMD_CFUNC simd_equal(simd_float2x2 __x, simd_float2x2 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float2x3 __x, simd_float2x3 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float2x4 __x, simd_float2x4 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float3x2 __x, simd_float3x2 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float3x3 __x, simd_float3x3 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float3x4 __x, simd_float3x4 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float4x2 __x, simd_float4x2 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float4x3 __x, simd_float4x3 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_float4x4 __x, simd_float4x4 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double2x2 __x, simd_double2x2 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double2x3 __x, simd_double2x3 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double2x4 __x, simd_double2x4 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double3x2 __x, simd_double3x2 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double3x3 __x, simd_double3x3 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double3x4 __x, simd_double3x4 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double4x2 __x, simd_double4x2 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double4x3 __x, simd_double4x3 __y); +static simd_bool SIMD_CFUNC simd_equal(simd_double4x4 __x, simd_double4x4 __y); +#define matrix_equal simd_equal + +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x2 __x, simd_float2x2 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x3 __x, simd_float2x3 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x4 __x, simd_float2x4 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x2 __x, simd_float3x2 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x3 __x, simd_float3x3 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x4 __x, simd_float3x4 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x2 __x, simd_float4x2 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x3 __x, simd_float4x3 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x4 __x, simd_float4x4 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x2 __x, simd_double2x2 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x3 __x, simd_double2x3 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x4 __x, simd_double2x4 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x2 __x, simd_double3x2 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x3 __x, simd_double3x3 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x4 __x, simd_double3x4 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x2 __x, simd_double4x2 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x3 __x, simd_double4x3 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x4 __x, simd_double4x4 __y, double __tol); +#define matrix_almost_equal_elements simd_almost_equal_elements + +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x2 __x, simd_float2x2 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x3 __x, simd_float2x3 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x4 __x, simd_float2x4 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x2 __x, simd_float3x2 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x3 __x, simd_float3x3 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x4 __x, simd_float3x4 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x2 __x, simd_float4x2 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x3 __x, simd_float4x3 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x4 __x, simd_float4x4 __y, float __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x2 __x, simd_double2x2 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x3 __x, simd_double2x3 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x4 __x, simd_double2x4 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x2 __x, simd_double3x2 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x3 __x, simd_double3x3 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x4 __x, simd_double3x4 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x2 __x, simd_double4x2 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x3 __x, simd_double4x3 __y, double __tol); +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x4 __x, simd_double4x4 __y, double __tol); +#define matrix_almost_equal_elements_relative simd_almost_equal_elements_relative + +#ifdef __cplusplus +} /* extern "C" */ + +namespace simd { + static SIMD_CPPFUNC float2x2 operator+(const float2x2 x, const float2x2 y) { return float2x2(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float2x3 operator+(const float2x3 x, const float2x3 y) { return float2x3(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float2x4 operator+(const float2x4 x, const float2x4 y) { return float2x4(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float3x2 operator+(const float3x2 x, const float3x2 y) { return float3x2(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float3x3 operator+(const float3x3 x, const float3x3 y) { return float3x3(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float3x4 operator+(const float3x4 x, const float3x4 y) { return float3x4(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float4x2 operator+(const float4x2 x, const float4x2 y) { return float4x2(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float4x3 operator+(const float4x3 x, const float4x3 y) { return float4x3(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC float4x4 operator+(const float4x4 x, const float4x4 y) { return float4x4(::simd_linear_combination(1, x, 1, y)); } + + static SIMD_CPPFUNC float2x2 operator-(const float2x2 x, const float2x2 y) { return float2x2(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float2x3 operator-(const float2x3 x, const float2x3 y) { return float2x3(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float2x4 operator-(const float2x4 x, const float2x4 y) { return float2x4(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float3x2 operator-(const float3x2 x, const float3x2 y) { return float3x2(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float3x3 operator-(const float3x3 x, const float3x3 y) { return float3x3(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float3x4 operator-(const float3x4 x, const float3x4 y) { return float3x4(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float4x2 operator-(const float4x2 x, const float4x2 y) { return float4x2(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float4x3 operator-(const float4x3 x, const float4x3 y) { return float4x3(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC float4x4 operator-(const float4x4 x, const float4x4 y) { return float4x4(::simd_linear_combination(1, x, -1, y)); } + + static SIMD_INLINE SIMD_NODEBUG float2x2& operator+=(float2x2& x, const float2x2 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float2x3& operator+=(float2x3& x, const float2x3 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float2x4& operator+=(float2x4& x, const float2x4 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float3x2& operator+=(float3x2& x, const float3x2 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float3x3& operator+=(float3x3& x, const float3x3 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float3x4& operator+=(float3x4& x, const float3x4 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float4x2& operator+=(float4x2& x, const float4x2 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float4x3& operator+=(float4x3& x, const float4x3 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG float4x4& operator+=(float4x4& x, const float4x4 y) { x = x + y; return x; } + + static SIMD_INLINE SIMD_NODEBUG float2x2& operator-=(float2x2& x, const float2x2 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float2x3& operator-=(float2x3& x, const float2x3 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float2x4& operator-=(float2x4& x, const float2x4 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float3x2& operator-=(float3x2& x, const float3x2 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float3x3& operator-=(float3x3& x, const float3x3 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float3x4& operator-=(float3x4& x, const float3x4 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float4x2& operator-=(float4x2& x, const float4x2 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float4x3& operator-=(float4x3& x, const float4x3 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG float4x4& operator-=(float4x4& x, const float4x4 y) { x = x - y; return x; } + + static SIMD_CPPFUNC float2x2 transpose(const float2x2 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float2x3 transpose(const float3x2 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float2x4 transpose(const float4x2 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float3x2 transpose(const float2x3 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float3x3 transpose(const float3x3 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float3x4 transpose(const float4x3 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float4x2 transpose(const float2x4 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float4x3 transpose(const float3x4 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC float4x4 transpose(const float4x4 x) { return ::simd_transpose(x); } + + static SIMD_CPPFUNC float trace(const float2x2 x) { return ::simd_trace(x); } + static SIMD_CPPFUNC float trace(const float3x3 x) { return ::simd_trace(x); } + static SIMD_CPPFUNC float trace(const float4x4 x) { return ::simd_trace(x); } + + static SIMD_CPPFUNC float determinant(const float2x2 x) { return ::simd_determinant(x); } + static SIMD_CPPFUNC float determinant(const float3x3 x) { return ::simd_determinant(x); } + static SIMD_CPPFUNC float determinant(const float4x4 x) { return ::simd_determinant(x); } + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wgcc-compat" + static SIMD_CPPFUNC float2x2 inverse(const float2x2 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); } + static SIMD_CPPFUNC float3x3 inverse(const float3x3 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); } + static SIMD_CPPFUNC float4x4 inverse(const float4x4 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); } +#pragma clang diagnostic pop + + static SIMD_CPPFUNC float2x2 operator*(const float a, const float2x2 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float2x3 operator*(const float a, const float2x3 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float2x4 operator*(const float a, const float2x4 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float3x2 operator*(const float a, const float3x2 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float3x3 operator*(const float a, const float3x3 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float3x4 operator*(const float a, const float3x4 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float4x2 operator*(const float a, const float4x2 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float4x3 operator*(const float a, const float4x3 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float4x4 operator*(const float a, const float4x4 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float2x2 operator*(const float2x2 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float2x3 operator*(const float2x3 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float2x4 operator*(const float2x4 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float3x2 operator*(const float3x2 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float3x3 operator*(const float3x3 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float3x4 operator*(const float3x4 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float4x2 operator*(const float4x2 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float4x3 operator*(const float4x3 x, const float a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC float4x4 operator*(const float4x4 x, const float a) { return ::simd_mul(a, x); } + static SIMD_INLINE SIMD_NODEBUG float2x2& operator*=(float2x2& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float2x3& operator*=(float2x3& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float2x4& operator*=(float2x4& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float3x2& operator*=(float3x2& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float3x3& operator*=(float3x3& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float3x4& operator*=(float3x4& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float4x2& operator*=(float4x2& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float4x3& operator*=(float4x3& x, const float a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG float4x4& operator*=(float4x4& x, const float a) { x = ::simd_mul(a, x); return x; } + + static SIMD_CPPFUNC float2 operator*(const float2 x, const float2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3 operator*(const float2 x, const float3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4 operator*(const float2 x, const float4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2 operator*(const float3 x, const float2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3 operator*(const float3 x, const float3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4 operator*(const float3 x, const float4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2 operator*(const float4 x, const float2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3 operator*(const float4 x, const float3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4 operator*(const float4 x, const float4x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2 operator*(const float2x2 x, const float2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2 operator*(const float3x2 x, const float3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2 operator*(const float4x2 x, const float4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3 operator*(const float2x3 x, const float2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3 operator*(const float3x3 x, const float3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3 operator*(const float4x3 x, const float4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4 operator*(const float2x4 x, const float2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4 operator*(const float3x4 x, const float3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4 operator*(const float4x4 x, const float4 y) { return ::simd_mul(x, y); } + static SIMD_INLINE SIMD_NODEBUG float2& operator*=(float2& x, const float2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float3& operator*=(float3& x, const float3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float4& operator*=(float4& x, const float4x4 y) { x = ::simd_mul(x, y); return x; } + + static SIMD_CPPFUNC float2x2 operator*(const float2x2 x, const float2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x2 operator*(const float2x2 x, const float3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x2 operator*(const float2x2 x, const float4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x3 operator*(const float2x3 x, const float2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x3 operator*(const float2x3 x, const float3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x3 operator*(const float2x3 x, const float4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x4 operator*(const float2x4 x, const float2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x4 operator*(const float2x4 x, const float3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x4 operator*(const float2x4 x, const float4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x2 operator*(const float3x2 x, const float2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x2 operator*(const float3x2 x, const float3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x2 operator*(const float3x2 x, const float4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x3 operator*(const float3x3 x, const float2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x3 operator*(const float3x3 x, const float3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x3 operator*(const float3x3 x, const float4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x4 operator*(const float3x4 x, const float2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x4 operator*(const float3x4 x, const float3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x4 operator*(const float3x4 x, const float4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x2 operator*(const float4x2 x, const float2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x2 operator*(const float4x2 x, const float3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x2 operator*(const float4x2 x, const float4x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x3 operator*(const float4x3 x, const float2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x3 operator*(const float4x3 x, const float3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x3 operator*(const float4x3 x, const float4x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float2x4 operator*(const float4x4 x, const float2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float3x4 operator*(const float4x4 x, const float3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC float4x4 operator*(const float4x4 x, const float4x4 y) { return ::simd_mul(x, y); } + static SIMD_INLINE SIMD_NODEBUG float2x2& operator*=(float2x2& x, const float2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float2x3& operator*=(float2x3& x, const float2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float2x4& operator*=(float2x4& x, const float2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float3x2& operator*=(float3x2& x, const float3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float3x3& operator*=(float3x3& x, const float3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float3x4& operator*=(float3x4& x, const float3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float4x2& operator*=(float4x2& x, const float4x4 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float4x3& operator*=(float4x3& x, const float4x4 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG float4x4& operator*=(float4x4& x, const float4x4 y) { x = ::simd_mul(x, y); return x; } + + static SIMD_CPPFUNC bool operator==(const float2x2& x, const float2x2& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float2x3& x, const float2x3& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float2x4& x, const float2x4& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float3x2& x, const float3x2& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float3x3& x, const float3x3& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float3x4& x, const float3x4& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float4x2& x, const float4x2& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float4x3& x, const float4x3& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const float4x4& x, const float4x4& y) { return ::simd_equal(x, y); } + + static SIMD_CPPFUNC bool operator!=(const float2x2& x, const float2x2& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float2x3& x, const float2x3& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float2x4& x, const float2x4& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float3x2& x, const float3x2& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float3x3& x, const float3x3& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float3x4& x, const float3x4& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float4x2& x, const float4x2& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float4x3& x, const float4x3& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const float4x4& x, const float4x4& y) { return !(x == y); } + + static SIMD_CPPFUNC bool almost_equal_elements(const float2x2 x, const float2x2 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float2x3 x, const float2x3 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float2x4 x, const float2x4 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float3x2 x, const float3x2 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float3x3 x, const float3x3 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float3x4 x, const float3x4 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float4x2 x, const float4x2 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float4x3 x, const float4x3 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const float4x4 x, const float4x4 y, const float tol) { return ::simd_almost_equal_elements(x, y, tol); } + + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float2x2 x, const float2x2 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float2x3 x, const float2x3 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float2x4 x, const float2x4 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float3x2 x, const float3x2 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float3x3 x, const float3x3 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float3x4 x, const float3x4 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float4x2 x, const float4x2 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float4x3 x, const float4x3 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const float4x4 x, const float4x4 y, const float tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + + static SIMD_CPPFUNC double2x2 operator+(const double2x2 x, const double2x2 y) { return double2x2(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double2x3 operator+(const double2x3 x, const double2x3 y) { return double2x3(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double2x4 operator+(const double2x4 x, const double2x4 y) { return double2x4(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double3x2 operator+(const double3x2 x, const double3x2 y) { return double3x2(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double3x3 operator+(const double3x3 x, const double3x3 y) { return double3x3(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double3x4 operator+(const double3x4 x, const double3x4 y) { return double3x4(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double4x2 operator+(const double4x2 x, const double4x2 y) { return double4x2(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double4x3 operator+(const double4x3 x, const double4x3 y) { return double4x3(::simd_linear_combination(1, x, 1, y)); } + static SIMD_CPPFUNC double4x4 operator+(const double4x4 x, const double4x4 y) { return double4x4(::simd_linear_combination(1, x, 1, y)); } + + static SIMD_CPPFUNC double2x2 operator-(const double2x2 x, const double2x2 y) { return double2x2(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double2x3 operator-(const double2x3 x, const double2x3 y) { return double2x3(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double2x4 operator-(const double2x4 x, const double2x4 y) { return double2x4(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double3x2 operator-(const double3x2 x, const double3x2 y) { return double3x2(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double3x3 operator-(const double3x3 x, const double3x3 y) { return double3x3(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double3x4 operator-(const double3x4 x, const double3x4 y) { return double3x4(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double4x2 operator-(const double4x2 x, const double4x2 y) { return double4x2(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double4x3 operator-(const double4x3 x, const double4x3 y) { return double4x3(::simd_linear_combination(1, x, -1, y)); } + static SIMD_CPPFUNC double4x4 operator-(const double4x4 x, const double4x4 y) { return double4x4(::simd_linear_combination(1, x, -1, y)); } + + static SIMD_INLINE SIMD_NODEBUG double2x2& operator+=(double2x2& x, const double2x2 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double2x3& operator+=(double2x3& x, const double2x3 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double2x4& operator+=(double2x4& x, const double2x4 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double3x2& operator+=(double3x2& x, const double3x2 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double3x3& operator+=(double3x3& x, const double3x3 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double3x4& operator+=(double3x4& x, const double3x4 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double4x2& operator+=(double4x2& x, const double4x2 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double4x3& operator+=(double4x3& x, const double4x3 y) { x = x + y; return x; } + static SIMD_INLINE SIMD_NODEBUG double4x4& operator+=(double4x4& x, const double4x4 y) { x = x + y; return x; } + + static SIMD_INLINE SIMD_NODEBUG double2x2& operator-=(double2x2& x, const double2x2 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double2x3& operator-=(double2x3& x, const double2x3 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double2x4& operator-=(double2x4& x, const double2x4 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double3x2& operator-=(double3x2& x, const double3x2 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double3x3& operator-=(double3x3& x, const double3x3 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double3x4& operator-=(double3x4& x, const double3x4 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double4x2& operator-=(double4x2& x, const double4x2 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double4x3& operator-=(double4x3& x, const double4x3 y) { x = x - y; return x; } + static SIMD_INLINE SIMD_NODEBUG double4x4& operator-=(double4x4& x, const double4x4 y) { x = x - y; return x; } + + static SIMD_CPPFUNC double2x2 transpose(const double2x2 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double2x3 transpose(const double3x2 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double2x4 transpose(const double4x2 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double3x2 transpose(const double2x3 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double3x3 transpose(const double3x3 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double3x4 transpose(const double4x3 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double4x2 transpose(const double2x4 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double4x3 transpose(const double3x4 x) { return ::simd_transpose(x); } + static SIMD_CPPFUNC double4x4 transpose(const double4x4 x) { return ::simd_transpose(x); } + + static SIMD_CPPFUNC double trace(const double2x2 x) { return ::simd_trace(x); } + static SIMD_CPPFUNC double trace(const double3x3 x) { return ::simd_trace(x); } + static SIMD_CPPFUNC double trace(const double4x4 x) { return ::simd_trace(x); } + + static SIMD_CPPFUNC double determinant(const double2x2 x) { return ::simd_determinant(x); } + static SIMD_CPPFUNC double determinant(const double3x3 x) { return ::simd_determinant(x); } + static SIMD_CPPFUNC double determinant(const double4x4 x) { return ::simd_determinant(x); } + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wgcc-compat" + static SIMD_CPPFUNC double2x2 inverse(const double2x2 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); } + static SIMD_CPPFUNC double3x3 inverse(const double3x3 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); } + static SIMD_CPPFUNC double4x4 inverse(const double4x4 x) __API_AVAILABLE(macos(10.10), ios(8.0), watchos(2.0), tvos(9.0)) { return ::simd_inverse(x); } +#pragma clang diagnostic pop + + static SIMD_CPPFUNC double2x2 operator*(const double a, const double2x2 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double2x3 operator*(const double a, const double2x3 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double2x4 operator*(const double a, const double2x4 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double3x2 operator*(const double a, const double3x2 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double3x3 operator*(const double a, const double3x3 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double3x4 operator*(const double a, const double3x4 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double4x2 operator*(const double a, const double4x2 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double4x3 operator*(const double a, const double4x3 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double4x4 operator*(const double a, const double4x4 x) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double2x2 operator*(const double2x2 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double2x3 operator*(const double2x3 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double2x4 operator*(const double2x4 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double3x2 operator*(const double3x2 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double3x3 operator*(const double3x3 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double3x4 operator*(const double3x4 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double4x2 operator*(const double4x2 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double4x3 operator*(const double4x3 x, const double a) { return ::simd_mul(a, x); } + static SIMD_CPPFUNC double4x4 operator*(const double4x4 x, const double a) { return ::simd_mul(a, x); } + static SIMD_INLINE SIMD_NODEBUG double2x2& operator*=(double2x2& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double2x3& operator*=(double2x3& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double2x4& operator*=(double2x4& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double3x2& operator*=(double3x2& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double3x3& operator*=(double3x3& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double3x4& operator*=(double3x4& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double4x2& operator*=(double4x2& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double4x3& operator*=(double4x3& x, const double a) { x = ::simd_mul(a, x); return x; } + static SIMD_INLINE SIMD_NODEBUG double4x4& operator*=(double4x4& x, const double a) { x = ::simd_mul(a, x); return x; } + + static SIMD_CPPFUNC double2 operator*(const double2 x, const double2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3 operator*(const double2 x, const double3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4 operator*(const double2 x, const double4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2 operator*(const double3 x, const double2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3 operator*(const double3 x, const double3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4 operator*(const double3 x, const double4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2 operator*(const double4 x, const double2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3 operator*(const double4 x, const double3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4 operator*(const double4 x, const double4x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2 operator*(const double2x2 x, const double2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2 operator*(const double3x2 x, const double3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2 operator*(const double4x2 x, const double4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3 operator*(const double2x3 x, const double2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3 operator*(const double3x3 x, const double3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3 operator*(const double4x3 x, const double4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4 operator*(const double2x4 x, const double2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4 operator*(const double3x4 x, const double3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4 operator*(const double4x4 x, const double4 y) { return ::simd_mul(x, y); } + static SIMD_INLINE SIMD_NODEBUG double2& operator*=(double2& x, const double2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double3& operator*=(double3& x, const double3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double4& operator*=(double4& x, const double4x4 y) { x = ::simd_mul(x, y); return x; } + + static SIMD_CPPFUNC double2x2 operator*(const double2x2 x, const double2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x2 operator*(const double2x2 x, const double3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x2 operator*(const double2x2 x, const double4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x3 operator*(const double2x3 x, const double2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x3 operator*(const double2x3 x, const double3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x3 operator*(const double2x3 x, const double4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x4 operator*(const double2x4 x, const double2x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x4 operator*(const double2x4 x, const double3x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x4 operator*(const double2x4 x, const double4x2 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x2 operator*(const double3x2 x, const double2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x2 operator*(const double3x2 x, const double3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x2 operator*(const double3x2 x, const double4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x3 operator*(const double3x3 x, const double2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x3 operator*(const double3x3 x, const double3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x3 operator*(const double3x3 x, const double4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x4 operator*(const double3x4 x, const double2x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x4 operator*(const double3x4 x, const double3x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x4 operator*(const double3x4 x, const double4x3 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x2 operator*(const double4x2 x, const double2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x2 operator*(const double4x2 x, const double3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x2 operator*(const double4x2 x, const double4x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x3 operator*(const double4x3 x, const double2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x3 operator*(const double4x3 x, const double3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x3 operator*(const double4x3 x, const double4x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double2x4 operator*(const double4x4 x, const double2x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double3x4 operator*(const double4x4 x, const double3x4 y) { return ::simd_mul(x, y); } + static SIMD_CPPFUNC double4x4 operator*(const double4x4 x, const double4x4 y) { return ::simd_mul(x, y); } + static SIMD_INLINE SIMD_NODEBUG double2x2& operator*=(double2x2& x, const double2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double2x3& operator*=(double2x3& x, const double2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double2x4& operator*=(double2x4& x, const double2x2 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double3x2& operator*=(double3x2& x, const double3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double3x3& operator*=(double3x3& x, const double3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double3x4& operator*=(double3x4& x, const double3x3 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double4x2& operator*=(double4x2& x, const double4x4 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double4x3& operator*=(double4x3& x, const double4x4 y) { x = ::simd_mul(x, y); return x; } + static SIMD_INLINE SIMD_NODEBUG double4x4& operator*=(double4x4& x, const double4x4 y) { x = ::simd_mul(x, y); return x; } + + static SIMD_CPPFUNC bool operator==(const double2x2& x, const double2x2& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double2x3& x, const double2x3& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double2x4& x, const double2x4& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double3x2& x, const double3x2& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double3x3& x, const double3x3& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double3x4& x, const double3x4& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double4x2& x, const double4x2& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double4x3& x, const double4x3& y) { return ::simd_equal(x, y); } + static SIMD_CPPFUNC bool operator==(const double4x4& x, const double4x4& y) { return ::simd_equal(x, y); } + + static SIMD_CPPFUNC bool operator!=(const double2x2& x, const double2x2& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double2x3& x, const double2x3& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double2x4& x, const double2x4& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double3x2& x, const double3x2& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double3x3& x, const double3x3& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double3x4& x, const double3x4& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double4x2& x, const double4x2& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double4x3& x, const double4x3& y) { return !(x == y); } + static SIMD_CPPFUNC bool operator!=(const double4x4& x, const double4x4& y) { return !(x == y); } + + static SIMD_CPPFUNC bool almost_equal_elements(const double2x2 x, const double2x2 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double2x3 x, const double2x3 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double2x4 x, const double2x4 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double3x2 x, const double3x2 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double3x3 x, const double3x3 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double3x4 x, const double3x4 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double4x2 x, const double4x2 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double4x3 x, const double4x3 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements(const double4x4 x, const double4x4 y, const double tol) { return ::simd_almost_equal_elements(x, y, tol); } + + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double2x2 x, const double2x2 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double2x3 x, const double2x3 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double2x4 x, const double2x4 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double3x2 x, const double3x2 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double3x3 x, const double3x3 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double3x4 x, const double3x4 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double4x2 x, const double4x2 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double4x3 x, const double4x3 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } + static SIMD_CPPFUNC bool almost_equal_elements_relative(const double4x4 x, const double4x4 y, const double tol) { return ::simd_almost_equal_elements_relative(x, y, tol); } +} + +extern "C" { +#endif /* __cplusplus */ + +#pragma mark - Implementation + +static simd_float2x2 SIMD_CFUNC simd_diagonal_matrix(simd_float2 __x) { simd_float2x2 __r = { .columns[0] = {__x.x,0}, .columns[1] = {0,__x.y} }; return __r; } +static simd_double2x2 SIMD_CFUNC simd_diagonal_matrix(simd_double2 __x) { simd_double2x2 __r = { .columns[0] = {__x.x,0}, .columns[1] = {0,__x.y} }; return __r; } +static simd_float3x3 SIMD_CFUNC simd_diagonal_matrix(simd_float3 __x) { simd_float3x3 __r = { .columns[0] = {__x.x,0,0}, .columns[1] = {0,__x.y,0}, .columns[2] = {0,0,__x.z} }; return __r; } +static simd_double3x3 SIMD_CFUNC simd_diagonal_matrix(simd_double3 __x) { simd_double3x3 __r = { .columns[0] = {__x.x,0,0}, .columns[1] = {0,__x.y,0}, .columns[2] = {0,0,__x.z} }; return __r; } +static simd_float4x4 SIMD_CFUNC simd_diagonal_matrix(simd_float4 __x) { simd_float4x4 __r = { .columns[0] = {__x.x,0,0,0}, .columns[1] = {0,__x.y,0,0}, .columns[2] = {0,0,__x.z,0}, .columns[3] = {0,0,0,__x.w} }; return __r; } +static simd_double4x4 SIMD_CFUNC simd_diagonal_matrix(simd_double4 __x) { simd_double4x4 __r = { .columns[0] = {__x.x,0,0,0}, .columns[1] = {0,__x.y,0,0}, .columns[2] = {0,0,__x.z,0}, .columns[3] = {0,0,0,__x.w} }; return __r; } + +static simd_float2x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1) { simd_float2x2 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; } +static simd_float2x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1) { simd_float2x3 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; } +static simd_float2x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1) { simd_float2x4 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; } +static simd_double2x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1) { simd_double2x2 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; } +static simd_double2x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1) { simd_double2x3 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; } +static simd_double2x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1) { simd_double2x4 __r = { .columns[0] = col0, .columns[1] = col1 }; return __r; } +static simd_float3x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1, simd_float2 col2) { simd_float3x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; } +static simd_float3x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1, simd_float3 col2) { simd_float3x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; } +static simd_float3x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1, simd_float4 col2) { simd_float3x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; } +static simd_double3x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2) { simd_double3x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; } +static simd_double3x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2) { simd_double3x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; } +static simd_double3x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2) { simd_double3x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2 }; return __r; } +static simd_float4x2 SIMD_CFUNC simd_matrix(simd_float2 col0, simd_float2 col1, simd_float2 col2, simd_float2 col3) { simd_float4x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; } +static simd_float4x3 SIMD_CFUNC simd_matrix(simd_float3 col0, simd_float3 col1, simd_float3 col2, simd_float3 col3) { simd_float4x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; } +static simd_float4x4 SIMD_CFUNC simd_matrix(simd_float4 col0, simd_float4 col1, simd_float4 col2, simd_float4 col3) { simd_float4x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; } +static simd_double4x2 SIMD_CFUNC simd_matrix(simd_double2 col0, simd_double2 col1, simd_double2 col2, simd_double2 col3) { simd_double4x2 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; } +static simd_double4x3 SIMD_CFUNC simd_matrix(simd_double3 col0, simd_double3 col1, simd_double3 col2, simd_double3 col3) { simd_double4x3 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; } +static simd_double4x4 SIMD_CFUNC simd_matrix(simd_double4 col0, simd_double4 col1, simd_double4 col2, simd_double4 col3) { simd_double4x4 __r = { .columns[0] = col0, .columns[1] = col1, .columns[2] = col2, .columns[3] = col3 }; return __r; } + +static simd_float2x2 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1) { return simd_transpose(simd_matrix(row0, row1)); } +static simd_float3x2 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1) { return simd_transpose(simd_matrix(row0, row1)); } +static simd_float4x2 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1) { return simd_transpose(simd_matrix(row0, row1)); } +static simd_double2x2 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1) { return simd_transpose(simd_matrix(row0, row1)); } +static simd_double3x2 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1) { return simd_transpose(simd_matrix(row0, row1)); } +static simd_double4x2 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1) { return simd_transpose(simd_matrix(row0, row1)); } +static simd_float2x3 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1, simd_float2 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); } +static simd_float3x3 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1, simd_float3 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); } +static simd_float4x3 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1, simd_float4 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); } +static simd_double2x3 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); } +static simd_double3x3 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); } +static simd_double4x3 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2) { return simd_transpose(simd_matrix(row0, row1, row2)); } +static simd_float2x4 SIMD_CFUNC simd_matrix_from_rows(simd_float2 row0, simd_float2 row1, simd_float2 row2, simd_float2 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); } +static simd_float3x4 SIMD_CFUNC simd_matrix_from_rows(simd_float3 row0, simd_float3 row1, simd_float3 row2, simd_float3 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); } +static simd_float4x4 SIMD_CFUNC simd_matrix_from_rows(simd_float4 row0, simd_float4 row1, simd_float4 row2, simd_float4 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); } +static simd_double2x4 SIMD_CFUNC simd_matrix_from_rows(simd_double2 row0, simd_double2 row1, simd_double2 row2, simd_double2 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); } +static simd_double3x4 SIMD_CFUNC simd_matrix_from_rows(simd_double3 row0, simd_double3 row1, simd_double3 row2, simd_double3 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); } +static simd_double4x4 SIMD_CFUNC simd_matrix_from_rows(simd_double4 row0, simd_double4 row1, simd_double4 row2, simd_double4 row3) { return simd_transpose(simd_matrix(row0, row1, row2, row3)); } + +static simd_float3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatf q) { + simd_float4x4 r = simd_matrix4x4(q); + return (simd_float3x3){ r.columns[0].xyz, r.columns[1].xyz, r.columns[2].xyz }; +} + +static simd_float4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatf q) { + simd_float4 v = q.vector; + simd_float4x4 r = { + .columns[0] = { v.x*v.x - v.y*v.y - v.z*v.z + v.w*v.w, + 2*(v.x*v.y + v.z*v.w), + 2*(v.x*v.z - v.y*v.w), 0 }, + .columns[1] = { 2*(v.x*v.y - v.z*v.w), + v.y*v.y - v.z*v.z + v.w*v.w - v.x*v.x, + 2*(v.y*v.z + v.x*v.w), 0 }, + .columns[2] = { 2*(v.z*v.x + v.y*v.w), + 2*(v.y*v.z - v.x*v.w), + v.z*v.z + v.w*v.w - v.x*v.x - v.y*v.y, 0 }, + .columns[3] = { 0, 0, 0, 1 } + }; + return r; +} + +static simd_double3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatd q) { + simd_double4x4 r = simd_matrix4x4(q); + return (simd_double3x3){ r.columns[0].xyz, r.columns[1].xyz, r.columns[2].xyz }; +} + +static simd_double4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatd q) { + simd_double4 v = q.vector; + simd_double4x4 r = { + .columns[0] = { v.x*v.x - v.y*v.y - v.z*v.z + v.w*v.w, + 2*(v.x*v.y + v.z*v.w), + 2*(v.x*v.z - v.y*v.w), 0 }, + .columns[1] = { 2*(v.x*v.y - v.z*v.w), + v.y*v.y - v.z*v.z + v.w*v.w - v.x*v.x, + 2*(v.y*v.z + v.x*v.w), 0 }, + .columns[2] = { 2*(v.z*v.x + v.y*v.w), + 2*(v.y*v.z - v.x*v.w), + v.z*v.z + v.w*v.w - v.x*v.x - v.y*v.y, 0 }, + .columns[3] = { 0, 0, 0, 1 } + }; + return r; +} + +static simd_float2x2 SIMD_CFUNC matrix_scale(float __a, simd_float2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_float3x2 SIMD_CFUNC matrix_scale(float __a, simd_float3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_float4x2 SIMD_CFUNC matrix_scale(float __a, simd_float4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_float2x3 SIMD_CFUNC matrix_scale(float __a, simd_float2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_float3x3 SIMD_CFUNC matrix_scale(float __a, simd_float3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_float4x3 SIMD_CFUNC matrix_scale(float __a, simd_float4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_float2x4 SIMD_CFUNC matrix_scale(float __a, simd_float2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_float3x4 SIMD_CFUNC matrix_scale(float __a, simd_float3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_float4x4 SIMD_CFUNC matrix_scale(float __a, simd_float4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_double2x2 SIMD_CFUNC matrix_scale(double __a, simd_double2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_double3x2 SIMD_CFUNC matrix_scale(double __a, simd_double3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_double4x2 SIMD_CFUNC matrix_scale(double __a, simd_double4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_double2x3 SIMD_CFUNC matrix_scale(double __a, simd_double2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_double3x3 SIMD_CFUNC matrix_scale(double __a, simd_double3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_double4x3 SIMD_CFUNC matrix_scale(double __a, simd_double4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_double2x4 SIMD_CFUNC matrix_scale(double __a, simd_double2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_double3x4 SIMD_CFUNC matrix_scale(double __a, simd_double3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_double4x4 SIMD_CFUNC matrix_scale(double __a, simd_double4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } + +static simd_float2x2 SIMD_CFUNC simd_mul(float __a, simd_float2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_float3x2 SIMD_CFUNC simd_mul(float __a, simd_float3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_float4x2 SIMD_CFUNC simd_mul(float __a, simd_float4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_float2x3 SIMD_CFUNC simd_mul(float __a, simd_float2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_float3x3 SIMD_CFUNC simd_mul(float __a, simd_float3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_float4x3 SIMD_CFUNC simd_mul(float __a, simd_float4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_float2x4 SIMD_CFUNC simd_mul(float __a, simd_float2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_float3x4 SIMD_CFUNC simd_mul(float __a, simd_float3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_float4x4 SIMD_CFUNC simd_mul(float __a, simd_float4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_double2x2 SIMD_CFUNC simd_mul(double __a, simd_double2x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_double3x2 SIMD_CFUNC simd_mul(double __a, simd_double3x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_double4x2 SIMD_CFUNC simd_mul(double __a, simd_double4x2 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_double2x3 SIMD_CFUNC simd_mul(double __a, simd_double2x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_double3x3 SIMD_CFUNC simd_mul(double __a, simd_double3x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_double4x3 SIMD_CFUNC simd_mul(double __a, simd_double4x3 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } +static simd_double2x4 SIMD_CFUNC simd_mul(double __a, simd_double2x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; return __x; } +static simd_double3x4 SIMD_CFUNC simd_mul(double __a, simd_double3x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; return __x; } +static simd_double4x4 SIMD_CFUNC simd_mul(double __a, simd_double4x4 __x) { __x.columns[0] *= __a; __x.columns[1] *= __a; __x.columns[2] *= __a; __x.columns[3] *= __a; return __x; } + +static simd_float2x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x2 __x, float __b, simd_float2x2 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + return __x; +} +static simd_float3x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x2 __x, float __b, simd_float3x2 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + return __x; +} +static simd_float4x2 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x2 __x, float __b, simd_float4x2 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3]; + return __x; +} +static simd_float2x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x3 __x, float __b, simd_float2x3 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + return __x; +} +static simd_float3x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x3 __x, float __b, simd_float3x3 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + return __x; +} +static simd_float4x3 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x3 __x, float __b, simd_float4x3 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3]; + return __x; +} +static simd_float2x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float2x4 __x, float __b, simd_float2x4 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + return __x; +} +static simd_float3x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float3x4 __x, float __b, simd_float3x4 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + return __x; +} +static simd_float4x4 SIMD_CFUNC simd_linear_combination(float __a, simd_float4x4 __x, float __b, simd_float4x4 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3]; + return __x; +} +static simd_double2x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x2 __x, double __b, simd_double2x2 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + return __x; +} +static simd_double3x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x2 __x, double __b, simd_double3x2 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + return __x; +} +static simd_double4x2 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x2 __x, double __b, simd_double4x2 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3]; + return __x; +} +static simd_double2x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x3 __x, double __b, simd_double2x3 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + return __x; +} +static simd_double3x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x3 __x, double __b, simd_double3x3 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + return __x; +} +static simd_double4x3 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x3 __x, double __b, simd_double4x3 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3]; + return __x; +} +static simd_double2x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double2x4 __x, double __b, simd_double2x4 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + return __x; +} +static simd_double3x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double3x4 __x, double __b, simd_double3x4 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + return __x; +} +static simd_double4x4 SIMD_CFUNC simd_linear_combination(double __a, simd_double4x4 __x, double __b, simd_double4x4 __y) { + __x.columns[0] = __a*__x.columns[0] + __b*__y.columns[0]; + __x.columns[1] = __a*__x.columns[1] + __b*__y.columns[1]; + __x.columns[2] = __a*__x.columns[2] + __b*__y.columns[2]; + __x.columns[3] = __a*__x.columns[3] + __b*__y.columns[3]; + return __x; +} + +static simd_float2x2 SIMD_CFUNC simd_add(simd_float2x2 __x, simd_float2x2 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float3x2 SIMD_CFUNC simd_add(simd_float3x2 __x, simd_float3x2 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float4x2 SIMD_CFUNC simd_add(simd_float4x2 __x, simd_float4x2 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float2x3 SIMD_CFUNC simd_add(simd_float2x3 __x, simd_float2x3 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float3x3 SIMD_CFUNC simd_add(simd_float3x3 __x, simd_float3x3 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float4x3 SIMD_CFUNC simd_add(simd_float4x3 __x, simd_float4x3 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float2x4 SIMD_CFUNC simd_add(simd_float2x4 __x, simd_float2x4 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float3x4 SIMD_CFUNC simd_add(simd_float3x4 __x, simd_float3x4 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_float4x4 SIMD_CFUNC simd_add(simd_float4x4 __x, simd_float4x4 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double2x2 SIMD_CFUNC simd_add(simd_double2x2 __x, simd_double2x2 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double3x2 SIMD_CFUNC simd_add(simd_double3x2 __x, simd_double3x2 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double4x2 SIMD_CFUNC simd_add(simd_double4x2 __x, simd_double4x2 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double2x3 SIMD_CFUNC simd_add(simd_double2x3 __x, simd_double2x3 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double3x3 SIMD_CFUNC simd_add(simd_double3x3 __x, simd_double3x3 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double4x3 SIMD_CFUNC simd_add(simd_double4x3 __x, simd_double4x3 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double2x4 SIMD_CFUNC simd_add(simd_double2x4 __x, simd_double2x4 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double3x4 SIMD_CFUNC simd_add(simd_double3x4 __x, simd_double3x4 __y) { return simd_linear_combination(1, __x, 1, __y); } +static simd_double4x4 SIMD_CFUNC simd_add(simd_double4x4 __x, simd_double4x4 __y) { return simd_linear_combination(1, __x, 1, __y); } + +static simd_float2x2 SIMD_CFUNC simd_sub(simd_float2x2 __x, simd_float2x2 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float3x2 SIMD_CFUNC simd_sub(simd_float3x2 __x, simd_float3x2 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float4x2 SIMD_CFUNC simd_sub(simd_float4x2 __x, simd_float4x2 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float2x3 SIMD_CFUNC simd_sub(simd_float2x3 __x, simd_float2x3 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float3x3 SIMD_CFUNC simd_sub(simd_float3x3 __x, simd_float3x3 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float4x3 SIMD_CFUNC simd_sub(simd_float4x3 __x, simd_float4x3 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float2x4 SIMD_CFUNC simd_sub(simd_float2x4 __x, simd_float2x4 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float3x4 SIMD_CFUNC simd_sub(simd_float3x4 __x, simd_float3x4 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_float4x4 SIMD_CFUNC simd_sub(simd_float4x4 __x, simd_float4x4 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double2x2 SIMD_CFUNC simd_sub(simd_double2x2 __x, simd_double2x2 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double3x2 SIMD_CFUNC simd_sub(simd_double3x2 __x, simd_double3x2 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double4x2 SIMD_CFUNC simd_sub(simd_double4x2 __x, simd_double4x2 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double2x3 SIMD_CFUNC simd_sub(simd_double2x3 __x, simd_double2x3 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double3x3 SIMD_CFUNC simd_sub(simd_double3x3 __x, simd_double3x3 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double4x3 SIMD_CFUNC simd_sub(simd_double4x3 __x, simd_double4x3 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double2x4 SIMD_CFUNC simd_sub(simd_double2x4 __x, simd_double2x4 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double3x4 SIMD_CFUNC simd_sub(simd_double3x4 __x, simd_double3x4 __y) { return simd_linear_combination(1, __x, -1, __y); } +static simd_double4x4 SIMD_CFUNC simd_sub(simd_double4x4 __x, simd_double4x4 __y) { return simd_linear_combination(1, __x, -1, __y); } + +static simd_float2x2 SIMD_CFUNC simd_transpose(simd_float2x2 __x) { + simd_float4 __x0, __x1; + __x0.xy = __x.columns[0]; + __x1.xy = __x.columns[1]; +#if defined __SSE__ + simd_float4 __r01 = _mm_unpacklo_ps(__x0, __x1); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 __r01 = vzip1q_f32(__x0, __x1); +#else + simd_float4 __r01 = { __x0[0], __x1[0], __x0[1], __x1[1] }; +#endif + return simd_matrix(__r01.lo, __r01.hi); +} + +static simd_float3x2 SIMD_CFUNC simd_transpose(simd_float2x3 __x) { + simd_float4 __x0, __x1; + __x0.xyz = __x.columns[0]; + __x1.xyz = __x.columns[1]; +#if defined __SSE__ + simd_float4 __r01 = _mm_unpacklo_ps(__x0, __x1); + simd_float4 __r2x = _mm_unpackhi_ps(__x0, __x1); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 __r01 = vzip1q_f32(__x0, __x1); + simd_float4 __r2x = vzip2q_f32(__x0, __x1); +#else + simd_float4 __r01 = { __x0[0], __x1[0], __x0[1], __x1[1] }; + simd_float4 __r2x = { __x0[2], __x1[2] }; +#endif + return simd_matrix(__r01.lo, __r01.hi, __r2x.lo); +} + +static simd_float4x2 SIMD_CFUNC simd_transpose(simd_float2x4 __x) { +#if defined __SSE__ + simd_float4 __r01 = _mm_unpacklo_ps(__x.columns[0], __x.columns[1]); + simd_float4 __r23 = _mm_unpackhi_ps(__x.columns[0], __x.columns[1]); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 __r01 = vzip1q_f32(__x.columns[0], __x.columns[1]); + simd_float4 __r23 = vzip2q_f32(__x.columns[0], __x.columns[1]); +#else + simd_float4 __r01 = { __x.columns[0][0], __x.columns[1][0], __x.columns[0][1], __x.columns[1][1] }; + simd_float4 __r23 = { __x.columns[0][2], __x.columns[1][2], __x.columns[0][3], __x.columns[1][3] }; +#endif + return simd_matrix(__r01.lo, __r01.hi, __r23.lo, __r23.hi); +} + +static simd_float2x3 SIMD_CFUNC simd_transpose(simd_float3x2 __x) { + simd_float4 __x0, __x1, __x2; + __x0.xy = __x.columns[0]; + __x1.xy = __x.columns[1]; + __x2.xy = __x.columns[2]; +#if defined __SSE__ + simd_float4 __t = _mm_unpacklo_ps(__x0, __x1); + simd_float4 __r0 = _mm_shuffle_ps(__t,__x2,0xc4); + simd_float4 __r1 = _mm_shuffle_ps(__t,__x2,0xde); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 padding = { 0 }; + simd_float4 __t0 = vzip1q_f32(__x0,__x2); + simd_float4 __t1 = vzip1q_f32(__x1,padding); + simd_float4 __r0 = vzip1q_f32(__t0,__t1); + simd_float4 __r1 = vzip2q_f32(__t0,__t1); +#else + simd_float4 __r0 = { __x0[0], __x1[0], __x2[0] }; + simd_float4 __r1 = { __x0[1], __x1[1], __x2[1] }; +#endif + return simd_matrix(__r0.xyz, __r1.xyz); +} + +static simd_float3x3 SIMD_CFUNC simd_transpose(simd_float3x3 __x) { + simd_float4 __x0, __x1, __x2; + __x0.xyz = __x.columns[0]; + __x1.xyz = __x.columns[1]; + __x2.xyz = __x.columns[2]; +#if defined __SSE__ + simd_float4 __t0 = _mm_unpacklo_ps(__x0, __x1); + simd_float4 __t1 = _mm_unpackhi_ps(__x0, __x1); + simd_float4 __r0 = __t0; __r0.hi = __x2.lo; + simd_float4 __r1 = _mm_shuffle_ps(__t0, __x2, 0xde); + simd_float4 __r2 = __x2; __r2.lo = __t1.lo; +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 padding = { 0 }; + simd_float4 __t0 = vzip1q_f32(__x0,__x2); + simd_float4 __t1 = vzip2q_f32(__x0,__x2); + simd_float4 __t2 = vzip1q_f32(__x1,padding); + simd_float4 __t3 = vzip2q_f32(__x1,padding); + simd_float4 __r0 = vzip1q_f32(__t0,__t2); + simd_float4 __r1 = vzip2q_f32(__t0,__t2); + simd_float4 __r2 = vzip1q_f32(__t1,__t3); +#else + simd_float4 __r0 = {__x0[0], __x1[0], __x2[0]}; + simd_float4 __r1 = {__x0[1], __x1[1], __x2[1]}; + simd_float4 __r2 = {__x0[2], __x1[2], __x2[2]}; +#endif + return simd_matrix(__r0.xyz, __r1.xyz, __r2.xyz); +} + +static simd_float4x3 SIMD_CFUNC simd_transpose(simd_float3x4 __x) { +#if defined __SSE__ + simd_float4 __t0 = _mm_unpacklo_ps(__x.columns[0],__x.columns[1]); /* 00 10 01 11 */ + simd_float4 __t1 = _mm_unpackhi_ps(__x.columns[0],__x.columns[1]); /* 02 12 03 13 */ + simd_float4 __r0 = __t0; __r0.hi = __x.columns[2].lo; + simd_float4 __r1 = _mm_shuffle_ps(__t0, __x.columns[2], 0xde); + simd_float4 __r2 = __x.columns[2]; __r2.lo = __t1.lo; + simd_float4 __r3 = _mm_shuffle_ps(__t1, __x.columns[2], 0xfe); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 padding = { 0 }; + simd_float4 __t0 = vzip1q_f32(__x.columns[0],__x.columns[2]); + simd_float4 __t1 = vzip2q_f32(__x.columns[0],__x.columns[2]); + simd_float4 __t2 = vzip1q_f32(__x.columns[1],padding); + simd_float4 __t3 = vzip2q_f32(__x.columns[1],padding); + simd_float4 __r0 = vzip1q_f32(__t0,__t2); + simd_float4 __r1 = vzip2q_f32(__t0,__t2); + simd_float4 __r2 = vzip1q_f32(__t1,__t3); + simd_float4 __r3 = vzip2q_f32(__t1,__t3); +#else + simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0]}; + simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1]}; + simd_float4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2]}; + simd_float4 __r3 = {__x.columns[0][3], __x.columns[1][3], __x.columns[2][3]}; +#endif + return simd_matrix(__r0.xyz, __r1.xyz, __r2.xyz, __r3.xyz); +} + +static simd_float2x4 SIMD_CFUNC simd_transpose(simd_float4x2 __x) { + simd_float4 __x0, __x1, __x2, __x3; + __x0.xy = __x.columns[0]; + __x1.xy = __x.columns[1]; + __x2.xy = __x.columns[2]; + __x3.xy = __x.columns[3]; +#if defined __SSE__ + simd_float4 __t0 = _mm_unpacklo_ps(__x0,__x2); + simd_float4 __t1 = _mm_unpacklo_ps(__x1,__x3); + simd_float4 __r0 = _mm_unpacklo_ps(__t0,__t1); + simd_float4 __r1 = _mm_unpackhi_ps(__t0,__t1); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 __t0 = vzip1q_f32(__x0,__x2); + simd_float4 __t1 = vzip1q_f32(__x1,__x3); + simd_float4 __r0 = vzip1q_f32(__t0,__t1); + simd_float4 __r1 = vzip2q_f32(__t0,__t1); +#else + simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]}; + simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]}; +#endif + return simd_matrix(__r0,__r1); +} + +static simd_float3x4 SIMD_CFUNC simd_transpose(simd_float4x3 __x) { + simd_float4 __x0, __x1, __x2, __x3; + __x0.xyz = __x.columns[0]; + __x1.xyz = __x.columns[1]; + __x2.xyz = __x.columns[2]; + __x3.xyz = __x.columns[3]; +#if defined __SSE__ + simd_float4 __t0 = _mm_unpacklo_ps(__x0,__x2); + simd_float4 __t1 = _mm_unpackhi_ps(__x0,__x2); + simd_float4 __t2 = _mm_unpacklo_ps(__x1,__x3); + simd_float4 __t3 = _mm_unpackhi_ps(__x1,__x3); + simd_float4 __r0 = _mm_unpacklo_ps(__t0,__t2); + simd_float4 __r1 = _mm_unpackhi_ps(__t0,__t2); + simd_float4 __r2 = _mm_unpacklo_ps(__t1,__t3); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 __t0 = vzip1q_f32(__x0,__x2); + simd_float4 __t1 = vzip2q_f32(__x0,__x2); + simd_float4 __t2 = vzip1q_f32(__x1,__x3); + simd_float4 __t3 = vzip2q_f32(__x1,__x3); + simd_float4 __r0 = vzip1q_f32(__t0,__t2); + simd_float4 __r1 = vzip2q_f32(__t0,__t2); + simd_float4 __r2 = vzip1q_f32(__t1,__t3); +#else + simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]}; + simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]}; + simd_float4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2], __x.columns[3][2]}; +#endif + return simd_matrix(__r0,__r1,__r2); +} + +static simd_float4x4 SIMD_CFUNC simd_transpose(simd_float4x4 __x) { +#if defined __SSE__ + simd_float4 __t0 = _mm_unpacklo_ps(__x.columns[0],__x.columns[2]); + simd_float4 __t1 = _mm_unpackhi_ps(__x.columns[0],__x.columns[2]); + simd_float4 __t2 = _mm_unpacklo_ps(__x.columns[1],__x.columns[3]); + simd_float4 __t3 = _mm_unpackhi_ps(__x.columns[1],__x.columns[3]); + simd_float4 __r0 = _mm_unpacklo_ps(__t0,__t2); + simd_float4 __r1 = _mm_unpackhi_ps(__t0,__t2); + simd_float4 __r2 = _mm_unpacklo_ps(__t1,__t3); + simd_float4 __r3 = _mm_unpackhi_ps(__t1,__t3); +#elif defined __ARM_NEON__ && defined __arm64__ + simd_float4 __t0 = vzip1q_f32(__x.columns[0],__x.columns[2]); + simd_float4 __t1 = vzip2q_f32(__x.columns[0],__x.columns[2]); + simd_float4 __t2 = vzip1q_f32(__x.columns[1],__x.columns[3]); + simd_float4 __t3 = vzip2q_f32(__x.columns[1],__x.columns[3]); + simd_float4 __r0 = vzip1q_f32(__t0,__t2); + simd_float4 __r1 = vzip2q_f32(__t0,__t2); + simd_float4 __r2 = vzip1q_f32(__t1,__t3); + simd_float4 __r3 = vzip2q_f32(__t1,__t3); +#else + simd_float4 __r0 = {__x.columns[0][0], __x.columns[1][0], __x.columns[2][0], __x.columns[3][0]}; + simd_float4 __r1 = {__x.columns[0][1], __x.columns[1][1], __x.columns[2][1], __x.columns[3][1]}; + simd_float4 __r2 = {__x.columns[0][2], __x.columns[1][2], __x.columns[2][2], __x.columns[3][2]}; + simd_float4 __r3 = {__x.columns[0][3], __x.columns[1][3], __x.columns[2][3], __x.columns[3][3]}; +#endif + return simd_matrix(__r0,__r1,__r2,__r3); +} + +static simd_double2x2 SIMD_CFUNC simd_transpose(simd_double2x2 __x) { + simd_double2 __x0, __x1; + __x0 = __x.columns[0]; + __x1 = __x.columns[1]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double2 __r0 = vzip1q_f64(__x0, __x1); + simd_double2 __r1 = vzip2q_f64(__x0, __x1); +#else + simd_double2 __r0 = { __x0[0], __x1[0] }; + simd_double2 __r1 = { __x0[1], __x1[1] }; +#endif + return simd_matrix(__r0, __r1); +} + +static simd_double3x2 SIMD_CFUNC simd_transpose(simd_double2x3 __x) { + simd_double4 __x0, __x1; + __x0.xyz = __x.columns[0]; + __x1.xyz = __x.columns[1]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double2 __r0 = vzip1q_f64(__x0.lo,__x1.lo); + simd_double2 __r1 = vzip2q_f64(__x0.lo,__x1.lo); + simd_double2 __r2 = vzip1q_f64(__x0.hi,__x1.hi); +#else + simd_double2 __r0 = {__x0[0], __x1[0]}; + simd_double2 __r1 = {__x0[1], __x1[1]}; + simd_double2 __r2 = {__x0[2], __x1[2]}; +#endif + return simd_matrix(__r0,__r1,__r2); +} + +static simd_double4x2 SIMD_CFUNC simd_transpose(simd_double2x4 __x) { + simd_double4 __x0, __x1; + __x0 = __x.columns[0]; + __x1 = __x.columns[1]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double2 __r0 = vzip1q_f64(__x0.lo,__x1.lo); + simd_double2 __r1 = vzip2q_f64(__x0.lo,__x1.lo); + simd_double2 __r2 = vzip1q_f64(__x0.hi,__x1.hi); + simd_double2 __r3 = vzip2q_f64(__x0.hi,__x1.hi); +#else + simd_double2 __r0 = {__x0[0], __x1[0]}; + simd_double2 __r1 = {__x0[1], __x1[1]}; + simd_double2 __r2 = {__x0[2], __x1[2]}; + simd_double2 __r3 = {__x0[3], __x1[3]}; +#endif + return simd_matrix(__r0,__r1,__r2,__r3); +} + +static simd_double2x3 SIMD_CFUNC simd_transpose(simd_double3x2 __x) { + simd_double2 __x0, __x1, __x2; + __x0 = __x.columns[0]; + __x1 = __x.columns[1]; + __x2 = __x.columns[2]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double2 padding = { 0 }; + simd_double4 __r0,__r1; + __r0.lo = vzip1q_f64(__x0,__x1); + __r1.lo = vzip2q_f64(__x0,__x1); + __r0.hi = vzip1q_f64(__x2,padding); + __r1.hi = vzip2q_f64(__x2,padding); +#else + simd_double4 __r0 = {__x0[0], __x1[0], __x2[0]}; + simd_double4 __r1 = {__x0[1], __x1[1], __x2[1]}; +#endif + return simd_matrix(__r0.xyz,__r1.xyz); +} + +static simd_double3x3 SIMD_CFUNC simd_transpose(simd_double3x3 __x) { + simd_double4 __x0, __x1, __x2; + __x0.xyz = __x.columns[0]; + __x1.xyz = __x.columns[1]; + __x2.xyz = __x.columns[2]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double2 padding = { 0 }; + simd_double4 __r0,__r1,__r2; + __r0.lo = vzip1q_f64(__x0.lo,__x1.lo); + __r1.lo = vzip2q_f64(__x0.lo,__x1.lo); + __r2.lo = vzip1q_f64(__x0.hi,__x1.hi); + __r0.hi = vzip1q_f64(__x2.lo,padding); + __r1.hi = vzip2q_f64(__x2.lo,padding); + __r2.hi = vzip1q_f64(__x2.hi,padding); +#else + simd_double4 __r0 = {__x0[0], __x1[0], __x2[0]}; + simd_double4 __r1 = {__x0[1], __x1[1], __x2[1]}; + simd_double4 __r2 = {__x0[2], __x1[2], __x2[2]}; +#endif + return simd_matrix(__r0.xyz,__r1.xyz,__r2.xyz); +} + +static simd_double4x3 SIMD_CFUNC simd_transpose(simd_double3x4 __x) { + simd_double4 __x0, __x1, __x2; + __x0 = __x.columns[0]; + __x1 = __x.columns[1]; + __x2 = __x.columns[2]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double2 padding = { 0 }; + simd_double4 __r0,__r1,__r2,__r3; + __r0.lo = vzip1q_f64(__x0.lo,__x1.lo); + __r1.lo = vzip2q_f64(__x0.lo,__x1.lo); + __r2.lo = vzip1q_f64(__x0.hi,__x1.hi); + __r3.lo = vzip2q_f64(__x0.hi,__x1.hi); + __r0.hi = vzip1q_f64(__x2.lo,padding); + __r1.hi = vzip2q_f64(__x2.lo,padding); + __r2.hi = vzip1q_f64(__x2.hi,padding); + __r3.hi = vzip2q_f64(__x2.hi,padding); +#else + simd_double4 __r0 = {__x0[0], __x1[0], __x2[0]}; + simd_double4 __r1 = {__x0[1], __x1[1], __x2[1]}; + simd_double4 __r2 = {__x0[2], __x1[2], __x2[2]}; + simd_double4 __r3 = {__x0[3], __x1[3], __x2[3]}; +#endif + return simd_matrix(__r0.xyz,__r1.xyz,__r2.xyz,__r3.xyz); +} + +static simd_double2x4 SIMD_CFUNC simd_transpose(simd_double4x2 __x) { + simd_double2 __x0, __x1, __x2, __x3; + __x0 = __x.columns[0]; + __x1 = __x.columns[1]; + __x2 = __x.columns[2]; + __x3 = __x.columns[3]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double4 __r0,__r1; + __r0.lo = vzip1q_f64(__x0,__x1); + __r1.lo = vzip2q_f64(__x0,__x1); + __r0.hi = vzip1q_f64(__x2,__x3); + __r1.hi = vzip2q_f64(__x2,__x3); +#else + simd_double4 __r0 = {__x0[0], __x1[0], __x2[0], __x3[0]}; + simd_double4 __r1 = {__x0[1], __x1[1], __x2[1], __x3[1]}; +#endif + return simd_matrix(__r0,__r1); +} + +static simd_double3x4 SIMD_CFUNC simd_transpose(simd_double4x3 __x) { + simd_double4 __x0, __x1, __x2, __x3; + __x0.xyz = __x.columns[0]; + __x1.xyz = __x.columns[1]; + __x2.xyz = __x.columns[2]; + __x3.xyz = __x.columns[3]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double4 __r0,__r1,__r2; + __r0.lo = vzip1q_f64(__x0.lo,__x1.lo); + __r1.lo = vzip2q_f64(__x0.lo,__x1.lo); + __r2.lo = vzip1q_f64(__x0.hi,__x1.hi); + __r0.hi = vzip1q_f64(__x2.lo,__x3.lo); + __r1.hi = vzip2q_f64(__x2.lo,__x3.lo); + __r2.hi = vzip1q_f64(__x2.hi,__x3.hi); +#else + simd_double4 __r0 = {__x0[0], __x1[0], __x2[0], __x3[0]}; + simd_double4 __r1 = {__x0[1], __x1[1], __x2[1], __x3[1]}; + simd_double4 __r2 = {__x0[2], __x1[2], __x2[2], __x3[2]}; +#endif + return simd_matrix(__r0,__r1,__r2); +} + +static simd_double4x4 SIMD_CFUNC simd_transpose(simd_double4x4 __x) { + simd_double4 __x0, __x1, __x2, __x3; + __x0 = __x.columns[0]; + __x1 = __x.columns[1]; + __x2 = __x.columns[2]; + __x3 = __x.columns[3]; +#if defined __ARM_NEON__ && defined __arm64__ + simd_double4 __r0,__r1,__r2,__r3; + __r0.lo = vzip1q_f64(__x0.lo,__x1.lo); + __r1.lo = vzip2q_f64(__x0.lo,__x1.lo); + __r2.lo = vzip1q_f64(__x0.hi,__x1.hi); + __r3.lo = vzip2q_f64(__x0.hi,__x1.hi); + __r0.hi = vzip1q_f64(__x2.lo,__x3.lo); + __r1.hi = vzip2q_f64(__x2.lo,__x3.lo); + __r2.hi = vzip1q_f64(__x2.hi,__x3.hi); + __r3.hi = vzip2q_f64(__x2.hi,__x3.hi); +#else + simd_double4 __r0 = {__x0[0], __x1[0], __x2[0], __x3[0]}; + simd_double4 __r1 = {__x0[1], __x1[1], __x2[1], __x3[1]}; + simd_double4 __r2 = {__x0[2], __x1[2], __x2[2], __x3[2]}; + simd_double4 __r3 = {__x0[3], __x1[3], __x2[3], __x3[3]}; +#endif + return simd_matrix(__r0,__r1,__r2,__r3); +} + +static simd_float3 SIMD_CFUNC __rotate1( simd_float3 __x) { return __builtin_shufflevector(__x,__x,1,2,0); } +static simd_float3 SIMD_CFUNC __rotate2( simd_float3 __x) { return __builtin_shufflevector(__x,__x,2,0,1); } +static simd_float4 SIMD_CFUNC __rotate1( simd_float4 __x) { return __builtin_shufflevector(__x,__x,1,2,3,0); } +static simd_float4 SIMD_CFUNC __rotate2( simd_float4 __x) { return __builtin_shufflevector(__x,__x,2,3,0,1); } +static simd_float4 SIMD_CFUNC __rotate3( simd_float4 __x) { return __builtin_shufflevector(__x,__x,3,0,1,2); } +static simd_double3 SIMD_CFUNC __rotate1(simd_double3 __x) { return __builtin_shufflevector(__x,__x,1,2,0); } +static simd_double3 SIMD_CFUNC __rotate2(simd_double3 __x) { return __builtin_shufflevector(__x,__x,2,0,1); } +static simd_double4 SIMD_CFUNC __rotate1(simd_double4 __x) { return __builtin_shufflevector(__x,__x,1,2,3,0); } +static simd_double4 SIMD_CFUNC __rotate2(simd_double4 __x) { return __builtin_shufflevector(__x,__x,2,3,0,1); } +static simd_double4 SIMD_CFUNC __rotate3(simd_double4 __x) { return __builtin_shufflevector(__x,__x,3,0,1,2); } + +static float SIMD_CFUNC simd_trace( simd_float2x2 __x) { return __x.columns[0][0] + __x.columns[1][1]; } +static double SIMD_CFUNC simd_trace(simd_double2x2 __x) { return __x.columns[0][0] + __x.columns[1][1]; } +static float SIMD_CFUNC simd_trace( simd_float3x3 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2]; } +static double SIMD_CFUNC simd_trace(simd_double3x3 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2]; } +static float SIMD_CFUNC simd_trace( simd_float4x4 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2] + __x.columns[3][3]; } +static double SIMD_CFUNC simd_trace(simd_double4x4 __x) { return __x.columns[0][0] + __x.columns[1][1] + __x.columns[2][2] + __x.columns[3][3]; } + +static float SIMD_CFUNC simd_determinant( simd_float2x2 __x) { return __x.columns[0][0]*__x.columns[1][1] - __x.columns[0][1]*__x.columns[1][0]; } +static double SIMD_CFUNC simd_determinant(simd_double2x2 __x) { return __x.columns[0][0]*__x.columns[1][1] - __x.columns[0][1]*__x.columns[1][0]; } +static float SIMD_CFUNC simd_determinant( simd_float3x3 __x) { return simd_reduce_add(__x.columns[0]*(__rotate1(__x.columns[1])*__rotate2(__x.columns[2]) - __rotate2(__x.columns[1])*__rotate1(__x.columns[2]))); } +static double SIMD_CFUNC simd_determinant(simd_double3x3 __x) { return simd_reduce_add(__x.columns[0]*(__rotate1(__x.columns[1])*__rotate2(__x.columns[2]) - __rotate2(__x.columns[1])*__rotate1(__x.columns[2]))); } +static float SIMD_CFUNC simd_determinant( simd_float4x4 __x) { + simd_float4 codet = __x.columns[0]*(__rotate1(__x.columns[1])*(__rotate2(__x.columns[2])*__rotate3(__x.columns[3])-__rotate3(__x.columns[2])*__rotate2(__x.columns[3])) + + __rotate2(__x.columns[1])*(__rotate3(__x.columns[2])*__rotate1(__x.columns[3])-__rotate1(__x.columns[2])*__rotate3(__x.columns[3])) + + __rotate3(__x.columns[1])*(__rotate1(__x.columns[2])*__rotate2(__x.columns[3])-__rotate2(__x.columns[2])*__rotate1(__x.columns[3]))); + return simd_reduce_add(codet.even - codet.odd); +} +static double SIMD_CFUNC simd_determinant(simd_double4x4 __x) { + simd_double4 codet = __x.columns[0]*(__rotate1(__x.columns[1])*(__rotate2(__x.columns[2])*__rotate3(__x.columns[3])-__rotate3(__x.columns[2])*__rotate2(__x.columns[3])) + + __rotate2(__x.columns[1])*(__rotate3(__x.columns[2])*__rotate1(__x.columns[3])-__rotate1(__x.columns[2])*__rotate3(__x.columns[3])) + + __rotate3(__x.columns[1])*(__rotate1(__x.columns[2])*__rotate2(__x.columns[3])-__rotate2(__x.columns[2])*__rotate1(__x.columns[3]))); + return simd_reduce_add(codet.even - codet.odd); +} + +static simd_float2x2 SIMD_CFUNC simd_inverse( simd_float2x2 __x) { return __invert_f2(__x); } +static simd_float3x3 SIMD_CFUNC simd_inverse( simd_float3x3 __x) { return __invert_f3(__x); } +static simd_float4x4 SIMD_CFUNC simd_inverse( simd_float4x4 __x) { return __invert_f4(__x); } +static simd_double2x2 SIMD_CFUNC simd_inverse(simd_double2x2 __x) { return __invert_d2(__x); } +static simd_double3x3 SIMD_CFUNC simd_inverse(simd_double3x3 __x) { return __invert_d3(__x); } +static simd_double4x4 SIMD_CFUNC simd_inverse(simd_double4x4 __x) { return __invert_d4(__x); } + +static simd_float2 SIMD_CFUNC simd_mul( simd_float2x2 __x, simd_float2 __y) { simd_float2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; } +static simd_float3 SIMD_CFUNC simd_mul( simd_float2x3 __x, simd_float2 __y) { simd_float3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; } +static simd_float4 SIMD_CFUNC simd_mul( simd_float2x4 __x, simd_float2 __y) { simd_float4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; } +static simd_float2 SIMD_CFUNC simd_mul( simd_float3x2 __x, simd_float3 __y) { simd_float2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; } +static simd_float3 SIMD_CFUNC simd_mul( simd_float3x3 __x, simd_float3 __y) { simd_float3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; } +static simd_float4 SIMD_CFUNC simd_mul( simd_float3x4 __x, simd_float3 __y) { simd_float4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; } +static simd_float2 SIMD_CFUNC simd_mul( simd_float4x2 __x, simd_float4 __y) { simd_float2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; } +static simd_float3 SIMD_CFUNC simd_mul( simd_float4x3 __x, simd_float4 __y) { simd_float3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; } +static simd_float4 SIMD_CFUNC simd_mul( simd_float4x4 __x, simd_float4 __y) { simd_float4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; } +static simd_double2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2 __y) { simd_double2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; } +static simd_double3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2 __y) { simd_double3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; } +static simd_double4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2 __y) { simd_double4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); return __r; } +static simd_double2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3 __y) { simd_double2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; } +static simd_double3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3 __y) { simd_double3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; } +static simd_double4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3 __y) { simd_double4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); return __r; } +static simd_double2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4 __y) { simd_double2 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; } +static simd_double3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4 __y) { simd_double3 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; } +static simd_double4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4 __y) { simd_double4 __r = __x.columns[0]*__y[0]; __r = simd_muladd( __x.columns[1], __y[1],__r); __r = simd_muladd( __x.columns[2], __y[2],__r); __r = simd_muladd( __x.columns[3], __y[3],__r); return __r; } + +static simd_float2 SIMD_CFUNC simd_mul( simd_float2 __x, simd_float2x2 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float3 SIMD_CFUNC simd_mul( simd_float2 __x, simd_float3x2 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float4 SIMD_CFUNC simd_mul( simd_float2 __x, simd_float4x2 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float2 SIMD_CFUNC simd_mul( simd_float3 __x, simd_float2x3 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float3 SIMD_CFUNC simd_mul( simd_float3 __x, simd_float3x3 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float4 SIMD_CFUNC simd_mul( simd_float3 __x, simd_float4x3 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float2 SIMD_CFUNC simd_mul( simd_float4 __x, simd_float2x4 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float3 SIMD_CFUNC simd_mul( simd_float4 __x, simd_float3x4 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_float4 SIMD_CFUNC simd_mul( simd_float4 __x, simd_float4x4 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double2 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double2x2 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double3 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double3x2 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double4 SIMD_CFUNC simd_mul(simd_double2 __x, simd_double4x2 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double2 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double2x3 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double3 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double3x3 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double4 SIMD_CFUNC simd_mul(simd_double3 __x, simd_double4x3 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double2 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double2x4 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double3 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double3x4 __y) { return simd_mul(simd_transpose(__y), __x); } +static simd_double4 SIMD_CFUNC simd_mul(simd_double4 __x, simd_double4x4 __y) { return simd_mul(simd_transpose(__y), __x); } + +static simd_float2x2 SIMD_CFUNC simd_mul( simd_float2x2 __x, simd_float2x2 __y) { simd_float2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double2x2 __y) { simd_double2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x3 SIMD_CFUNC simd_mul( simd_float2x3 __x, simd_float2x2 __y) { simd_float2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double2x2 __y) { simd_double2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x4 SIMD_CFUNC simd_mul( simd_float2x4 __x, simd_float2x2 __y) { simd_float2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double2x2 __y) { simd_double2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x2 SIMD_CFUNC simd_mul( simd_float3x2 __x, simd_float2x3 __y) { simd_float2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double2x3 __y) { simd_double2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x3 SIMD_CFUNC simd_mul( simd_float3x3 __x, simd_float2x3 __y) { simd_float2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double2x3 __y) { simd_double2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x4 SIMD_CFUNC simd_mul( simd_float3x4 __x, simd_float2x3 __y) { simd_float2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double2x3 __y) { simd_double2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x2 SIMD_CFUNC simd_mul( simd_float4x2 __x, simd_float2x4 __y) { simd_float2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double2x4 __y) { simd_double2x2 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x3 SIMD_CFUNC simd_mul( simd_float4x3 __x, simd_float2x4 __y) { simd_float2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double2x4 __y) { simd_double2x3 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float2x4 SIMD_CFUNC simd_mul( simd_float4x4 __x, simd_float2x4 __y) { simd_float2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double2x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double2x4 __y) { simd_double2x4 __r; for (int i=0; i<2; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } + +static simd_float3x2 SIMD_CFUNC simd_mul( simd_float2x2 __x, simd_float3x2 __y) { simd_float3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double3x2 __y) { simd_double3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x3 SIMD_CFUNC simd_mul( simd_float2x3 __x, simd_float3x2 __y) { simd_float3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double3x2 __y) { simd_double3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x4 SIMD_CFUNC simd_mul( simd_float2x4 __x, simd_float3x2 __y) { simd_float3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double3x2 __y) { simd_double3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x2 SIMD_CFUNC simd_mul( simd_float3x2 __x, simd_float3x3 __y) { simd_float3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double3x3 __y) { simd_double3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x3 SIMD_CFUNC simd_mul( simd_float3x3 __x, simd_float3x3 __y) { simd_float3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double3x3 __y) { simd_double3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x4 SIMD_CFUNC simd_mul( simd_float3x4 __x, simd_float3x3 __y) { simd_float3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double3x3 __y) { simd_double3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x2 SIMD_CFUNC simd_mul( simd_float4x2 __x, simd_float3x4 __y) { simd_float3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double3x4 __y) { simd_double3x2 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x3 SIMD_CFUNC simd_mul( simd_float4x3 __x, simd_float3x4 __y) { simd_float3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double3x4 __y) { simd_double3x3 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float3x4 SIMD_CFUNC simd_mul( simd_float4x4 __x, simd_float3x4 __y) { simd_float3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double3x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double3x4 __y) { simd_double3x4 __r; for (int i=0; i<3; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } + +static simd_float4x2 SIMD_CFUNC simd_mul( simd_float2x2 __x, simd_float4x2 __y) { simd_float4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x2 SIMD_CFUNC simd_mul(simd_double2x2 __x, simd_double4x2 __y) { simd_double4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x3 SIMD_CFUNC simd_mul( simd_float2x3 __x, simd_float4x2 __y) { simd_float4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x3 SIMD_CFUNC simd_mul(simd_double2x3 __x, simd_double4x2 __y) { simd_double4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x4 SIMD_CFUNC simd_mul( simd_float2x4 __x, simd_float4x2 __y) { simd_float4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x4 SIMD_CFUNC simd_mul(simd_double2x4 __x, simd_double4x2 __y) { simd_double4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x2 SIMD_CFUNC simd_mul( simd_float3x2 __x, simd_float4x3 __y) { simd_float4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x2 SIMD_CFUNC simd_mul(simd_double3x2 __x, simd_double4x3 __y) { simd_double4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x3 SIMD_CFUNC simd_mul( simd_float3x3 __x, simd_float4x3 __y) { simd_float4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x3 SIMD_CFUNC simd_mul(simd_double3x3 __x, simd_double4x3 __y) { simd_double4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x4 SIMD_CFUNC simd_mul( simd_float3x4 __x, simd_float4x3 __y) { simd_float4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x4 SIMD_CFUNC simd_mul(simd_double3x4 __x, simd_double4x3 __y) { simd_double4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x2 SIMD_CFUNC simd_mul( simd_float4x2 __x, simd_float4x4 __y) { simd_float4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x2 SIMD_CFUNC simd_mul(simd_double4x2 __x, simd_double4x4 __y) { simd_double4x2 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x3 SIMD_CFUNC simd_mul( simd_float4x3 __x, simd_float4x4 __y) { simd_float4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x3 SIMD_CFUNC simd_mul(simd_double4x3 __x, simd_double4x4 __y) { simd_double4x3 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_float4x4 SIMD_CFUNC simd_mul( simd_float4x4 __x, simd_float4x4 __y) { simd_float4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } +static simd_double4x4 SIMD_CFUNC simd_mul(simd_double4x4 __x, simd_double4x4 __y) { simd_double4x4 __r; for (int i=0; i<4; ++i) __r.columns[i] = simd_mul(__x, __y.columns[i]); return __r; } + +static simd_float2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x, simd_float2 __y) { return simd_mul(__x, __y); } +static simd_float3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x, simd_float2 __y) { return simd_mul(__x, __y); } +static simd_float4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x, simd_float2 __y) { return simd_mul(__x, __y); } +static simd_float2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x, simd_float3 __y) { return simd_mul(__x, __y); } +static simd_float3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x, simd_float3 __y) { return simd_mul(__x, __y); } +static simd_float4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x, simd_float3 __y) { return simd_mul(__x, __y); } +static simd_float2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x, simd_float4 __y) { return simd_mul(__x, __y); } +static simd_float3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x, simd_float4 __y) { return simd_mul(__x, __y); } +static simd_float4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x, simd_float4 __y) { return simd_mul(__x, __y); } +static simd_double2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double2 __y) { return simd_mul(__x, __y); } +static simd_double3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double2 __y) { return simd_mul(__x, __y); } +static simd_double4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double2 __y) { return simd_mul(__x, __y); } +static simd_double2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double3 __y) { return simd_mul(__x, __y); } +static simd_double3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double3 __y) { return simd_mul(__x, __y); } +static simd_double4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double3 __y) { return simd_mul(__x, __y); } +static simd_double2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double4 __y) { return simd_mul(__x, __y); } +static simd_double3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double4 __y) { return simd_mul(__x, __y); } +static simd_double4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double4 __y) { return simd_mul(__x, __y); } + +static simd_float2 SIMD_CFUNC matrix_multiply( simd_float2 __x, simd_float2x2 __y) { return simd_mul(__x, __y); } +static simd_float3 SIMD_CFUNC matrix_multiply( simd_float2 __x, simd_float3x2 __y) { return simd_mul(__x, __y); } +static simd_float4 SIMD_CFUNC matrix_multiply( simd_float2 __x, simd_float4x2 __y) { return simd_mul(__x, __y); } +static simd_float2 SIMD_CFUNC matrix_multiply( simd_float3 __x, simd_float2x3 __y) { return simd_mul(__x, __y); } +static simd_float3 SIMD_CFUNC matrix_multiply( simd_float3 __x, simd_float3x3 __y) { return simd_mul(__x, __y); } +static simd_float4 SIMD_CFUNC matrix_multiply( simd_float3 __x, simd_float4x3 __y) { return simd_mul(__x, __y); } +static simd_float2 SIMD_CFUNC matrix_multiply( simd_float4 __x, simd_float2x4 __y) { return simd_mul(__x, __y); } +static simd_float3 SIMD_CFUNC matrix_multiply( simd_float4 __x, simd_float3x4 __y) { return simd_mul(__x, __y); } +static simd_float4 SIMD_CFUNC matrix_multiply( simd_float4 __x, simd_float4x4 __y) { return simd_mul(__x, __y); } +static simd_double2 SIMD_CFUNC matrix_multiply(simd_double2 __x, simd_double2x2 __y) { return simd_mul(__x, __y); } +static simd_double3 SIMD_CFUNC matrix_multiply(simd_double2 __x, simd_double3x2 __y) { return simd_mul(__x, __y); } +static simd_double4 SIMD_CFUNC matrix_multiply(simd_double2 __x, simd_double4x2 __y) { return simd_mul(__x, __y); } +static simd_double2 SIMD_CFUNC matrix_multiply(simd_double3 __x, simd_double2x3 __y) { return simd_mul(__x, __y); } +static simd_double3 SIMD_CFUNC matrix_multiply(simd_double3 __x, simd_double3x3 __y) { return simd_mul(__x, __y); } +static simd_double4 SIMD_CFUNC matrix_multiply(simd_double3 __x, simd_double4x3 __y) { return simd_mul(__x, __y); } +static simd_double2 SIMD_CFUNC matrix_multiply(simd_double4 __x, simd_double2x4 __y) { return simd_mul(__x, __y); } +static simd_double3 SIMD_CFUNC matrix_multiply(simd_double4 __x, simd_double3x4 __y) { return simd_mul(__x, __y); } +static simd_double4 SIMD_CFUNC matrix_multiply(simd_double4 __x, simd_double4x4 __y) { return simd_mul(__x, __y); } + +static simd_float2x2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x, simd_float2x2 __y) { return simd_mul(__x, __y); } +static simd_double2x2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double2x2 __y) { return simd_mul(__x, __y); } +static simd_float2x3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x, simd_float2x2 __y) { return simd_mul(__x, __y); } +static simd_double2x3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double2x2 __y) { return simd_mul(__x, __y); } +static simd_float2x4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x, simd_float2x2 __y) { return simd_mul(__x, __y); } +static simd_double2x4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double2x2 __y) { return simd_mul(__x, __y); } +static simd_float2x2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x, simd_float2x3 __y) { return simd_mul(__x, __y); } +static simd_double2x2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double2x3 __y) { return simd_mul(__x, __y); } +static simd_float2x3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x, simd_float2x3 __y) { return simd_mul(__x, __y); } +static simd_double2x3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double2x3 __y) { return simd_mul(__x, __y); } +static simd_float2x4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x, simd_float2x3 __y) { return simd_mul(__x, __y); } +static simd_double2x4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double2x3 __y) { return simd_mul(__x, __y); } +static simd_float2x2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x, simd_float2x4 __y) { return simd_mul(__x, __y); } +static simd_double2x2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double2x4 __y) { return simd_mul(__x, __y); } +static simd_float2x3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x, simd_float2x4 __y) { return simd_mul(__x, __y); } +static simd_double2x3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double2x4 __y) { return simd_mul(__x, __y); } +static simd_float2x4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x, simd_float2x4 __y) { return simd_mul(__x, __y); } +static simd_double2x4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double2x4 __y) { return simd_mul(__x, __y); } + +static simd_float3x2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x, simd_float3x2 __y) { return simd_mul(__x, __y); } +static simd_double3x2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double3x2 __y) { return simd_mul(__x, __y); } +static simd_float3x3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x, simd_float3x2 __y) { return simd_mul(__x, __y); } +static simd_double3x3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double3x2 __y) { return simd_mul(__x, __y); } +static simd_float3x4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x, simd_float3x2 __y) { return simd_mul(__x, __y); } +static simd_double3x4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double3x2 __y) { return simd_mul(__x, __y); } +static simd_float3x2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x, simd_float3x3 __y) { return simd_mul(__x, __y); } +static simd_double3x2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double3x3 __y) { return simd_mul(__x, __y); } +static simd_float3x3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x, simd_float3x3 __y) { return simd_mul(__x, __y); } +static simd_double3x3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double3x3 __y) { return simd_mul(__x, __y); } +static simd_float3x4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x, simd_float3x3 __y) { return simd_mul(__x, __y); } +static simd_double3x4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double3x3 __y) { return simd_mul(__x, __y); } +static simd_float3x2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x, simd_float3x4 __y) { return simd_mul(__x, __y); } +static simd_double3x2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double3x4 __y) { return simd_mul(__x, __y); } +static simd_float3x3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x, simd_float3x4 __y) { return simd_mul(__x, __y); } +static simd_double3x3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double3x4 __y) { return simd_mul(__x, __y); } +static simd_float3x4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x, simd_float3x4 __y) { return simd_mul(__x, __y); } +static simd_double3x4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double3x4 __y) { return simd_mul(__x, __y); } + +static simd_float4x2 SIMD_CFUNC matrix_multiply( simd_float2x2 __x, simd_float4x2 __y) { return simd_mul(__x, __y); } +static simd_double4x2 SIMD_CFUNC matrix_multiply(simd_double2x2 __x, simd_double4x2 __y) { return simd_mul(__x, __y); } +static simd_float4x3 SIMD_CFUNC matrix_multiply( simd_float2x3 __x, simd_float4x2 __y) { return simd_mul(__x, __y); } +static simd_double4x3 SIMD_CFUNC matrix_multiply(simd_double2x3 __x, simd_double4x2 __y) { return simd_mul(__x, __y); } +static simd_float4x4 SIMD_CFUNC matrix_multiply( simd_float2x4 __x, simd_float4x2 __y) { return simd_mul(__x, __y); } +static simd_double4x4 SIMD_CFUNC matrix_multiply(simd_double2x4 __x, simd_double4x2 __y) { return simd_mul(__x, __y); } +static simd_float4x2 SIMD_CFUNC matrix_multiply( simd_float3x2 __x, simd_float4x3 __y) { return simd_mul(__x, __y); } +static simd_double4x2 SIMD_CFUNC matrix_multiply(simd_double3x2 __x, simd_double4x3 __y) { return simd_mul(__x, __y); } +static simd_float4x3 SIMD_CFUNC matrix_multiply( simd_float3x3 __x, simd_float4x3 __y) { return simd_mul(__x, __y); } +static simd_double4x3 SIMD_CFUNC matrix_multiply(simd_double3x3 __x, simd_double4x3 __y) { return simd_mul(__x, __y); } +static simd_float4x4 SIMD_CFUNC matrix_multiply( simd_float3x4 __x, simd_float4x3 __y) { return simd_mul(__x, __y); } +static simd_double4x4 SIMD_CFUNC matrix_multiply(simd_double3x4 __x, simd_double4x3 __y) { return simd_mul(__x, __y); } +static simd_float4x2 SIMD_CFUNC matrix_multiply( simd_float4x2 __x, simd_float4x4 __y) { return simd_mul(__x, __y); } +static simd_double4x2 SIMD_CFUNC matrix_multiply(simd_double4x2 __x, simd_double4x4 __y) { return simd_mul(__x, __y); } +static simd_float4x3 SIMD_CFUNC matrix_multiply( simd_float4x3 __x, simd_float4x4 __y) { return simd_mul(__x, __y); } +static simd_double4x3 SIMD_CFUNC matrix_multiply(simd_double4x3 __x, simd_double4x4 __y) { return simd_mul(__x, __y); } +static simd_float4x4 SIMD_CFUNC matrix_multiply( simd_float4x4 __x, simd_float4x4 __y) { return simd_mul(__x, __y); } +static simd_double4x4 SIMD_CFUNC matrix_multiply(simd_double4x4 __x, simd_double4x4 __y) { return simd_mul(__x, __y); } + +static simd_bool SIMD_CFUNC simd_equal(simd_float2x2 __x, simd_float2x2 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float2x3 __x, simd_float2x3 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float2x4 __x, simd_float2x4 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float3x2 __x, simd_float3x2 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float3x3 __x, simd_float3x3 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float3x4 __x, simd_float3x4 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float4x2 __x, simd_float4x2 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2]) & + (__x.columns[3] == __y.columns[3])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float4x3 __x, simd_float4x3 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2]) & + (__x.columns[3] == __y.columns[3])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_float4x4 __x, simd_float4x4 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2]) & + (__x.columns[3] == __y.columns[3])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double2x2 __x, simd_double2x2 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double2x3 __x, simd_double2x3 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double2x4 __x, simd_double2x4 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double3x2 __x, simd_double3x2 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double3x3 __x, simd_double3x3 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double3x4 __x, simd_double3x4 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double4x2 __x, simd_double4x2 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2]) & + (__x.columns[3] == __y.columns[3])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double4x3 __x, simd_double4x3 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2]) & + (__x.columns[3] == __y.columns[3])); +} +static simd_bool SIMD_CFUNC simd_equal(simd_double4x4 __x, simd_double4x4 __y) { + return simd_all((__x.columns[0] == __y.columns[0]) & + (__x.columns[1] == __y.columns[1]) & + (__x.columns[2] == __y.columns[2]) & + (__x.columns[3] == __y.columns[3])); +} + +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x2 __x, simd_float2x2 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x3 __x, simd_float2x3 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float2x4 __x, simd_float2x4 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x2 __x, simd_float3x2 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x3 __x, simd_float3x3 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float3x4 __x, simd_float3x4 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x2 __x, simd_float4x2 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x3 __x, simd_float4x3 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_float4x4 __x, simd_float4x4 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x2 __x, simd_double2x2 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x3 __x, simd_double2x3 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double2x4 __x, simd_double2x4 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x2 __x, simd_double3x2 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x3 __x, simd_double3x3 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double3x4 __x, simd_double3x4 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x2 __x, simd_double4x2 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x3 __x, simd_double4x3 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol)); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements(simd_double4x4 __x, simd_double4x4 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol)); +} + +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x2 __x, simd_float2x2 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x3 __x, simd_float2x3 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float2x4 __x, simd_float2x4 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x2 __x, simd_float3x2 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x3 __x, simd_float3x3 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float3x4 __x, simd_float3x4 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x2 __x, simd_float4x2 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x3 __x, simd_float4x3 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_float4x4 __x, simd_float4x4 __y, float __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x2 __x, simd_double2x2 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x3 __x, simd_double2x3 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double2x4 __x, simd_double2x4 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x2 __x, simd_double3x2 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x3 __x, simd_double3x3 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double3x4 __x, simd_double3x4 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x2 __x, simd_double4x2 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x3 __x, simd_double4x3 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3]))); +} +static simd_bool SIMD_CFUNC simd_almost_equal_elements_relative(simd_double4x4 __x, simd_double4x4 __y, double __tol) { + return simd_all((__tg_fabs(__x.columns[0] - __y.columns[0]) <= __tol*__tg_fabs(__x.columns[0])) & + (__tg_fabs(__x.columns[1] - __y.columns[1]) <= __tol*__tg_fabs(__x.columns[1])) & + (__tg_fabs(__x.columns[2] - __y.columns[2]) <= __tol*__tg_fabs(__x.columns[2])) & + (__tg_fabs(__x.columns[3] - __y.columns[3]) <= __tol*__tg_fabs(__x.columns[3]))); +} + +#ifdef __cplusplus +} +#endif +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* __SIMD_HEADER__ */ diff --git a/vfsoverlay/matrix_types.h b/vfsoverlay/matrix_types.h new file mode 100644 index 00000000..69f821ee --- /dev/null +++ b/vfsoverlay/matrix_types.h @@ -0,0 +1,525 @@ +/* Copyright (c) 2014-2017 Apple, Inc. All rights reserved. + * + * This header defines nine matrix types for each of float and double, which + * are intended for use together with the vector types defined in + * . + * + * For compatibility with common graphics libraries, these matrices are stored + * in column-major order, and implemented as arrays of column vectors. + * Column-major storage order may seem a little strange if you aren't used to + * it, but for most usage the memory layout of the matrices shouldn't matter + * at all; instead you should think of matrices as abstract mathematical + * objects that you use to perform arithmetic without worrying about the + * details of the underlying representation. + * + * WARNING: vectors of length three are internally represented as length four + * vectors with one element of padding (for alignment purposes). This means + * that when a floatNx3 or doubleNx3 is viewed as a vector, it appears to + * have 4*N elements instead of the expected 3*N (with one padding element + * at the end of each column). The matrix elements are laid out in memory + * as follows: + * + * { 0, 1, 2, x, 3, 4, 5, x, ... } + * + * (where the scalar indices used above indicate the conceptual column- + * major storage order). If you aren't monkeying around with the internal + * storage details of matrices, you don't need to worry about this at all. + * Consider this yet another good reason to avoid doing so. */ + +#ifndef SIMD_MATRIX_TYPES_HEADER +#define SIMD_MATRIX_TYPES_HEADER + +#include +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES + +/* Matrix types available in C, Objective-C, and C++ */ +typedef simd_float2x2 matrix_float2x2; +typedef simd_float3x2 matrix_float3x2; +typedef simd_float4x2 matrix_float4x2; + +typedef simd_float2x3 matrix_float2x3; +typedef simd_float3x3 matrix_float3x3; +typedef simd_float4x3 matrix_float4x3; + +typedef simd_float2x4 matrix_float2x4; +typedef simd_float3x4 matrix_float3x4; +typedef simd_float4x4 matrix_float4x4; + +typedef simd_double2x2 matrix_double2x2; +typedef simd_double3x2 matrix_double3x2; +typedef simd_double4x2 matrix_double4x2; + +typedef simd_double2x3 matrix_double2x3; +typedef simd_double3x3 matrix_double3x3; +typedef simd_double4x3 matrix_double4x3; + +typedef simd_double2x4 matrix_double2x4; +typedef simd_double3x4 matrix_double3x4; +typedef simd_double4x4 matrix_double4x4; + +#ifdef __cplusplus +#if defined SIMD_MATRIX_HEADER +static simd_float3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatf q); +static simd_float4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatf q); +static simd_double3x3 SIMD_NOINLINE simd_matrix3x3(simd_quatd q); +static simd_double4x4 SIMD_NOINLINE simd_matrix4x4(simd_quatd q); +#endif + +namespace simd { + + struct float2x2 : ::simd_float2x2 { + SIMD_CONSTEXPR float2x2() SIMD_NOEXCEPT : ::simd_float2x2((simd_float2x2){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float2x2(float diagonal) SIMD_NOEXCEPT : float2x2((float2)diagonal) { } +#endif + SIMD_CONSTEXPR float2x2(float2 v) SIMD_NOEXCEPT : + ::simd_float2x2((simd_float2x2){(float2){v.x,0}, (float2){0,v.y}}) { } + SIMD_CONSTEXPR float2x2(float2 c0, float2 c1) SIMD_NOEXCEPT : simd_float2x2((simd_float2x2){c0, c1}) { } + SIMD_CONSTEXPR float2x2(::simd_float2x2 m) SIMD_NOEXCEPT : ::simd_float2x2(m) { } + }; + + struct float3x2 : ::simd_float3x2 { + SIMD_CONSTEXPR float3x2() SIMD_NOEXCEPT : ::simd_float3x2((simd_float3x2){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float3x2(float diagonal) SIMD_NOEXCEPT : float3x2((float2)diagonal) { } +#endif + SIMD_CONSTEXPR float3x2(float2 v) SIMD_NOEXCEPT : + ::simd_float3x2((simd_float3x2){(float2){v.x,0}, (float2){0,v.y}, (float2){0}}) { } + SIMD_CONSTEXPR float3x2(float2 c0, float2 c1, float2 c2) SIMD_NOEXCEPT : + ::simd_float3x2((simd_float3x2){c0, c1, c2}) { } + SIMD_CONSTEXPR float3x2(::simd_float3x2 m) SIMD_NOEXCEPT : ::simd_float3x2(m) { } + }; + + struct float4x2 : ::simd_float4x2 { + SIMD_CONSTEXPR float4x2() SIMD_NOEXCEPT : ::simd_float4x2((simd_float4x2){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float4x2(float diagonal) SIMD_NOEXCEPT : float4x2((float2)diagonal) { } +#endif + SIMD_CONSTEXPR float4x2(float2 v) SIMD_NOEXCEPT : + ::simd_float4x2((simd_float4x2){(float2){v.x,0}, (float2){0,v.y}, (float2){0}, (float2){0}}) { } + SIMD_CONSTEXPR float4x2(float2 c0, float2 c1, float2 c2, float2 c3) SIMD_NOEXCEPT : + ::simd_float4x2((simd_float4x2){c0, c1, c2, c3}) { } + SIMD_CONSTEXPR float4x2(::simd_float4x2 m) SIMD_NOEXCEPT : ::simd_float4x2(m) { } + }; + + struct float2x3 : ::simd_float2x3 { + SIMD_CONSTEXPR float2x3() SIMD_NOEXCEPT : ::simd_float2x3((simd_float2x3){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float2x3(float diagonal) SIMD_NOEXCEPT : float2x3((float2)diagonal) { } +#endif + SIMD_CONSTEXPR float2x3(float2 v) SIMD_NOEXCEPT : + ::simd_float2x3((simd_float2x3){(float3){v.x,0,0}, (float3){0,v.y,0}}) { } + SIMD_CONSTEXPR float2x3(float3 c0, float3 c1) SIMD_NOEXCEPT : ::simd_float2x3((simd_float2x3){c0, c1}) { } + SIMD_CONSTEXPR float2x3(::simd_float2x3 m) SIMD_NOEXCEPT : ::simd_float2x3(m) { } + }; + + struct float3x3 : ::simd_float3x3 { + SIMD_CONSTEXPR float3x3() SIMD_NOEXCEPT : ::simd_float3x3((simd_float3x3){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float3x3(float diagonal) SIMD_NOEXCEPT : float3x3((float3)diagonal) { } +#endif + SIMD_CONSTEXPR float3x3(float3 v) SIMD_NOEXCEPT : + ::simd_float3x3((simd_float3x3){(float3){v.x,0,0}, (float3){0,v.y,0}, (float3){0,0,v.z}}) { } + SIMD_CONSTEXPR float3x3(float3 c0, float3 c1, float3 c2) SIMD_NOEXCEPT : + ::simd_float3x3((simd_float3x3){c0, c1, c2}) { } + SIMD_CONSTEXPR float3x3(::simd_float3x3 m) SIMD_NOEXCEPT : ::simd_float3x3(m) { } +#if defined SIMD_MATRIX_HEADER + SIMD_CONSTEXPR float3x3(::simd_quatf q) SIMD_NOEXCEPT : ::simd_float3x3(::simd_matrix3x3(q)) { } +#endif + }; + + struct float4x3 : ::simd_float4x3 { + SIMD_CONSTEXPR float4x3() SIMD_NOEXCEPT : ::simd_float4x3((simd_float4x3){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float4x3(float diagonal) SIMD_NOEXCEPT : float4x3((float3)diagonal) { } +#endif + SIMD_CONSTEXPR float4x3(float3 v) SIMD_NOEXCEPT : + ::simd_float4x3((simd_float4x3){(float3){v.x,0,0}, (float3){0,v.y,0}, (float3){0,0,v.z}, (float3){0}}) { } + SIMD_CONSTEXPR float4x3(float3 c0, float3 c1, float3 c2, float3 c3) SIMD_NOEXCEPT : + ::simd_float4x3((simd_float4x3){c0, c1, c2, c3}) { } + SIMD_CONSTEXPR float4x3(::simd_float4x3 m) SIMD_NOEXCEPT : ::simd_float4x3(m) { } + }; + + struct float2x4 : ::simd_float2x4 { + SIMD_CONSTEXPR float2x4() SIMD_NOEXCEPT : ::simd_float2x4((simd_float2x4){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float2x4(float diagonal) SIMD_NOEXCEPT : float2x4((float2)diagonal) { } +#endif + SIMD_CONSTEXPR float2x4(float2 v) SIMD_NOEXCEPT : + ::simd_float2x4((simd_float2x4){(float4){v.x,0,0,0}, (float4){0,v.y,0,0}}) { } + SIMD_CONSTEXPR float2x4(float4 c0, float4 c1) SIMD_NOEXCEPT : ::simd_float2x4((simd_float2x4){c0, c1}) { } + SIMD_CONSTEXPR float2x4(::simd_float2x4 m) SIMD_NOEXCEPT : ::simd_float2x4(m) { } + }; + + struct float3x4 : ::simd_float3x4 { + SIMD_CONSTEXPR float3x4() SIMD_NOEXCEPT : ::simd_float3x4((simd_float3x4){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float3x4(float diagonal) SIMD_NOEXCEPT : float3x4((float3)diagonal) { } +#endif + SIMD_CONSTEXPR float3x4(float3 v) SIMD_NOEXCEPT : + ::simd_float3x4((simd_float3x4){(float4){v.x,0,0,0}, (float4){0,v.y,0,0}, (float4){0,0,v.z,0}}) { } + SIMD_CONSTEXPR float3x4(float4 c0, float4 c1, float4 c2) SIMD_NOEXCEPT : + ::simd_float3x4((simd_float3x4){c0, c1, c2}) { } + SIMD_CONSTEXPR float3x4(::simd_float3x4 m) SIMD_NOEXCEPT : ::simd_float3x4(m) { } + }; + + struct float4x4 : ::simd_float4x4 { + SIMD_CONSTEXPR float4x4() SIMD_NOEXCEPT : ::simd_float4x4((simd_float4x4){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR float4x4(float diagonal) SIMD_NOEXCEPT : float4x4((float4)diagonal) { } +#endif + SIMD_CONSTEXPR float4x4(float4 v) SIMD_NOEXCEPT : + ::simd_float4x4((simd_float4x4){(float4){v.x,0,0,0}, (float4){0,v.y,0,0}, (float4){0,0,v.z,0}, (float4){0,0,0,v.w}}) { } + SIMD_CONSTEXPR float4x4(float4 c0, float4 c1, float4 c2, float4 c3) SIMD_NOEXCEPT : + ::simd_float4x4((simd_float4x4){c0, c1, c2, c3}) { } + SIMD_CONSTEXPR float4x4(::simd_float4x4 m) SIMD_NOEXCEPT : ::simd_float4x4(m) { } +#if defined SIMD_MATRIX_HEADER + SIMD_CONSTEXPR float4x4(::simd_quatf q) SIMD_NOEXCEPT : ::simd_float4x4(::simd_matrix4x4(q)) { } +#endif + }; + + struct double2x2 : ::simd_double2x2 { + SIMD_CONSTEXPR double2x2() SIMD_NOEXCEPT : ::simd_double2x2((simd_double2x2){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double2x2(double diagonal) SIMD_NOEXCEPT : double2x2((double2)diagonal) { } +#endif + SIMD_CONSTEXPR double2x2(double2 v) SIMD_NOEXCEPT : + ::simd_double2x2((simd_double2x2){(double2){v.x,0}, (double2){0,v.y}}) { } + SIMD_CONSTEXPR double2x2(double2 c0, double2 c1) SIMD_NOEXCEPT : + ::simd_double2x2((simd_double2x2){c0, c1}) { } + SIMD_CONSTEXPR double2x2(::simd_double2x2 m) SIMD_NOEXCEPT : ::simd_double2x2(m) { } + }; + + struct double3x2 : ::simd_double3x2 { + SIMD_CONSTEXPR double3x2() SIMD_NOEXCEPT : ::simd_double3x2((simd_double3x2){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double3x2(double diagonal) SIMD_NOEXCEPT : double3x2((double2)diagonal) { } +#endif + SIMD_CONSTEXPR double3x2(double2 v) SIMD_NOEXCEPT : + ::simd_double3x2((simd_double3x2){(double2){v.x,0}, (double2){0,v.y}, (double2){0}}) { } + SIMD_CONSTEXPR double3x2(double2 c0, double2 c1, double2 c2) SIMD_NOEXCEPT : + ::simd_double3x2((simd_double3x2){c0, c1, c2}) { } + SIMD_CONSTEXPR double3x2(::simd_double3x2 m) SIMD_NOEXCEPT : ::simd_double3x2(m) { } + }; + + struct double4x2 : ::simd_double4x2 { + SIMD_CONSTEXPR double4x2() SIMD_NOEXCEPT : ::simd_double4x2((simd_double4x2){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double4x2(double diagonal) SIMD_NOEXCEPT : double4x2((double2)diagonal) { } +#endif + SIMD_CONSTEXPR double4x2(double2 v) SIMD_NOEXCEPT : + ::simd_double4x2((simd_double4x2){(double2){v.x,0}, (double2){0,v.y}, (double2){0}, (double2){0}}) { } + SIMD_CONSTEXPR double4x2(double2 c0, double2 c1, double2 c2, double2 c3) SIMD_NOEXCEPT : + ::simd_double4x2((simd_double4x2){c0, c1, c2, c3}) { } + SIMD_CONSTEXPR double4x2(::simd_double4x2 m) SIMD_NOEXCEPT : ::simd_double4x2(m) { } + }; + + struct double2x3 : ::simd_double2x3 { + SIMD_CONSTEXPR double2x3() SIMD_NOEXCEPT : ::simd_double2x3((simd_double2x3){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double2x3(double diagonal) SIMD_NOEXCEPT : double2x3((double2)diagonal) { } +#endif + SIMD_CONSTEXPR double2x3(double2 v) SIMD_NOEXCEPT : + ::simd_double2x3((simd_double2x3){(double3){v.x,0,0}, (double3){0,v.y,0}}) { } + SIMD_CONSTEXPR double2x3(double3 c0, double3 c1) SIMD_NOEXCEPT : + ::simd_double2x3((simd_double2x3){c0, c1}) { } + SIMD_CONSTEXPR double2x3(::simd_double2x3 m) SIMD_NOEXCEPT : ::simd_double2x3(m) { } + }; + + struct double3x3 : ::simd_double3x3 { + SIMD_CONSTEXPR double3x3() SIMD_NOEXCEPT : ::simd_double3x3((simd_double3x3){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double3x3(double diagonal) SIMD_NOEXCEPT : double3x3((double3)diagonal) { } +#endif + SIMD_CONSTEXPR double3x3(double3 v) SIMD_NOEXCEPT : + ::simd_double3x3((simd_double3x3){(double3){v.x,0,0}, (double3){0,v.y,0}, (double3){0,0,v.z}}) { } + SIMD_CONSTEXPR double3x3(double3 c0, double3 c1, double3 c2) SIMD_NOEXCEPT : + ::simd_double3x3((simd_double3x3){c0, c1, c2}) { } + SIMD_CONSTEXPR double3x3(::simd_double3x3 m) SIMD_NOEXCEPT : ::simd_double3x3(m) { } +#if defined SIMD_MATRIX_HEADER + SIMD_CONSTEXPR double3x3(::simd_quatd q) SIMD_NOEXCEPT : ::simd_double3x3(::simd_matrix3x3(q)) { } +#endif + }; + + struct double4x3 : ::simd_double4x3 { + SIMD_CONSTEXPR double4x3() SIMD_NOEXCEPT : ::simd_double4x3((simd_double4x3){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double4x3(double diagonal) SIMD_NOEXCEPT : double4x3((double3)diagonal) { } +#endif + SIMD_CONSTEXPR double4x3(double3 v) SIMD_NOEXCEPT : + ::simd_double4x3((simd_double4x3){(double3){v.x,0,0}, (double3){0,v.y,0}, (double3){0,0,v.z}, (double3){0}}) { } + SIMD_CONSTEXPR double4x3(double3 c0, double3 c1, double3 c2, double3 c3) SIMD_NOEXCEPT : + ::simd_double4x3((simd_double4x3){c0, c1, c2, c3}) { } + SIMD_CONSTEXPR double4x3(::simd_double4x3 m) SIMD_NOEXCEPT : ::simd_double4x3(m) { } + }; + + struct double2x4 : ::simd_double2x4 { + SIMD_CONSTEXPR double2x4() SIMD_NOEXCEPT : ::simd_double2x4((simd_double2x4){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double2x4(double diagonal) SIMD_NOEXCEPT : double2x4((double2)diagonal) { } +#endif + SIMD_CONSTEXPR double2x4(double2 v) SIMD_NOEXCEPT : + ::simd_double2x4((simd_double2x4){(double4){v.x,0,0,0}, (double4){0,v.y,0,0}}) { } + SIMD_CONSTEXPR double2x4(double4 c0, double4 c1) SIMD_NOEXCEPT : ::simd_double2x4((simd_double2x4){c0, c1}) { } + SIMD_CONSTEXPR double2x4(::simd_double2x4 m) SIMD_NOEXCEPT : ::simd_double2x4(m) { } + }; + + struct double3x4 : ::simd_double3x4 { + SIMD_CONSTEXPR double3x4() SIMD_NOEXCEPT : ::simd_double3x4((simd_double3x4){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double3x4(double diagonal) SIMD_NOEXCEPT : double3x4((double3)diagonal) { } +#endif + SIMD_CONSTEXPR double3x4(double3 v) SIMD_NOEXCEPT : + ::simd_double3x4((simd_double3x4){(double4){v.x,0,0,0}, (double4){0,v.y,0,0}, (double4){0,0,v.z,0}}) { } + SIMD_CONSTEXPR double3x4(double4 c0, double4 c1, double4 c2) SIMD_NOEXCEPT : + ::simd_double3x4((simd_double3x4){c0, c1, c2}) { } + SIMD_CONSTEXPR double3x4(::simd_double3x4 m) SIMD_NOEXCEPT : ::simd_double3x4(m) { } + }; + + struct double4x4 : ::simd_double4x4 { + SIMD_CONSTEXPR double4x4() SIMD_NOEXCEPT : ::simd_double4x4((simd_double4x4){0}) { } +#if __has_feature(cxx_delegating_constructors) + SIMD_CONSTEXPR double4x4(double diagonal) SIMD_NOEXCEPT : double4x4((double4)diagonal) { } +#endif + SIMD_CONSTEXPR double4x4(double4 v) SIMD_NOEXCEPT : + ::simd_double4x4((simd_double4x4){(double4){v.x,0,0,0}, (double4){0,v.y,0,0}, (double4){0,0,v.z,0}, (double4){0,0,0,v.w}}) { } + SIMD_CONSTEXPR double4x4(double4 c0, double4 c1, double4 c2, double4 c3) SIMD_NOEXCEPT : + ::simd_double4x4((simd_double4x4){c0, c1, c2, c3}) { } + SIMD_CONSTEXPR double4x4(::simd_double4x4 m) SIMD_NOEXCEPT : ::simd_double4x4(m) { } +#if defined SIMD_MATRIX_HEADER + SIMD_CONSTEXPR double4x4(::simd_quatd q) SIMD_NOEXCEPT : ::simd_double4x4(::simd_matrix4x4(q)) { } +#endif + }; + +/*! @abstract Templated Matrix struct based on scalar type and number of columns and rows. */ +template struct Matrix { + // static const size_t col + // static const size_t row + // typedef scalar_t + // typedef type +}; +/*! @abstract Helper type to access the simd type easily. */ +template +using Matrix_t = typename Matrix::type; + +template<> struct Matrix { + static const size_t col = 2; + static const size_t row = 2; + typedef float scalar_t; + typedef float2x2 type; +}; + +template<> struct Matrix { + static const size_t col = 3; + static const size_t row = 2; + typedef float scalar_t; + typedef float3x2 type; +}; + +template<> struct Matrix { + static const size_t col = 4; + static const size_t row = 2; + typedef float scalar_t; + typedef float4x2 type; +}; + +template<> struct Matrix { + static const size_t col = 2; + static const size_t row = 3; + typedef float scalar_t; + typedef float2x3 type; +}; + +template<> struct Matrix { + static const size_t col = 3; + static const size_t row = 3; + typedef float scalar_t; + typedef float3x3 type; +}; + +template<> struct Matrix { + static const size_t col = 4; + static const size_t row = 3; + typedef float scalar_t; + typedef float4x3 type; +}; + +template<> struct Matrix { + static const size_t col = 2; + static const size_t row = 4; + typedef float scalar_t; + typedef float2x4 type; +}; + +template<> struct Matrix { + static const size_t col = 3; + static const size_t row = 4; + typedef float scalar_t; + typedef float3x4 type; +}; + +template<> struct Matrix { + static const size_t col = 4; + static const size_t row = 4; + typedef float scalar_t; + typedef float4x4 type; +}; + +template<> struct Matrix { + static const size_t col = 2; + static const size_t row = 2; + typedef double scalar_t; + typedef double2x2 type; +}; + +template<> struct Matrix { + static const size_t col = 3; + static const size_t row = 2; + typedef double scalar_t; + typedef double3x2 type; +}; + +template<> struct Matrix { + static const size_t col = 4; + static const size_t row = 2; + typedef double scalar_t; + typedef double4x2 type; +}; + +template<> struct Matrix { + static const size_t col = 2; + static const size_t row = 3; + typedef double scalar_t; + typedef double2x3 type; +}; + +template<> struct Matrix { + static const size_t col = 3; + static const size_t row = 3; + typedef double scalar_t; + typedef double3x3 type; +}; + +template<> struct Matrix { + static const size_t col = 4; + static const size_t row = 3; + typedef double scalar_t; + typedef double4x3 type; +}; + +template<> struct Matrix { + static const size_t col = 2; + static const size_t row = 4; + typedef double scalar_t; + typedef double2x4 type; +}; + +template<> struct Matrix { + static const size_t col = 3; + static const size_t row = 4; + typedef double scalar_t; + typedef double3x4 type; +}; + +template<> struct Matrix { + static const size_t col = 4; + static const size_t row = 4; + typedef double scalar_t; + typedef double4x4 type; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +template <> struct get_traits +{ + using type = Matrix; +}; + +} +#endif /* __cplusplus */ +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_MATRIX_TYPES_HEADER */ diff --git a/vfsoverlay/overlay.yaml b/vfsoverlay/overlay.yaml new file mode 100644 index 00000000..a4560827 --- /dev/null +++ b/vfsoverlay/overlay.yaml @@ -0,0 +1,28 @@ +{ + 'case-sensitive': 'false', + 'roots': [ + { + "contents": [ + { 'external-contents': "../vfsoverlay/packed.h", 'name': "simd/packed.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/types.h", 'name': "simd/types.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/quaternion.h", 'name': "simd/quaternion.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/matrix_types.h", 'name': "simd/matrix_types.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/matrix.h", 'name': "simd/matrix.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/conversion.h", 'name': "simd/conversion.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/vector_make.h", 'name': "simd/vector_make.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/common.h", 'name': "simd/common.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/logic.h", 'name': "simd/logic.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/simd.h", 'name': "simd/simd.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/vector_types.h", 'name': "simd/vector_types.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/math.h", 'name': "simd/math.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/extern.h", 'name': "simd/extern.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/vector.h", 'name': "simd/vector.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/geometry.h", 'name': "simd/geometry.h", 'type': 'file' }, + { 'external-contents': "../vfsoverlay/base.h", 'name': "simd/base.h", 'type': 'file' } + ], + 'name': "/Applications/Xcode.app/Contents/Developer/Platforms/iPhoneSimulator.platform/Developer/SDKs/iPhoneSimulator18.0.sdk/usr/include", + 'type': 'directory' + }, + ], + 'version': 0, +} diff --git a/vfsoverlay/packed.h b/vfsoverlay/packed.h new file mode 100644 index 00000000..284947c3 --- /dev/null +++ b/vfsoverlay/packed.h @@ -0,0 +1,1031 @@ +/*! @header + * This header defines fixed size vector types with relaxed alignment. For + * each vector type defined by that is not a 1- or 3- + * element vector, there is a corresponding type defined by this header that + * requires only the alignment matching that of the underlying scalar type. + * + * These types should be used to access buffers that may not be sufficiently + * aligned to allow them to be accessed using the "normal" simd vector types. + * As an example of this usage, suppose that you want to load a vector of + * four floats from an array of floats. The type simd_float4 has sixteen byte + * alignment, whereas an array of floats has only four byte alignment. + * Thus, naively casting a pointer into the array to (simd_float4 *) would + * invoke undefined behavior, and likely produce an alignment fault at + * runtime. Instead, use the corresponding packed type to load from the array: + * + *
+ *  @textblock
+ *  simd_float4 vector = *(simd_packed_float4 *)&array[i];
+ *  // do something with vector ...
+ *  @/textblock
+ *  
+ * + * It's important to note that the packed_ types are only needed to work with + * memory; once the data is loaded, we simply operate on it as usual using + * the simd_float4 type, as illustrated above. + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_PACKED_TYPES +#define SIMD_PACKED_TYPES + +# include +# if SIMD_COMPILER_HAS_REQUIRED_FEATURES +/*! @abstract A vector of two 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::char2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(1))) char simd_packed_char2; + +/*! @abstract A vector of four 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::char4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(1))) char simd_packed_char4; + +/*! @abstract A vector of eight 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ this type is also available as simd::packed::char8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(1))) char simd_packed_char8; + +/*! @abstract A vector of sixteen 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::char16. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(1))) char simd_packed_char16; + +/*! @abstract A vector of thirty-two 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::char32. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) char simd_packed_char32; + +/*! @abstract A vector of sixty-four 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::char64. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(64),__aligned__(1))) char simd_packed_char64; + +/*! @abstract A vector of two 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uchar2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(1))) unsigned char simd_packed_uchar2; + +/*! @abstract A vector of four 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uchar4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(1))) unsigned char simd_packed_uchar4; + +/*! @abstract A vector of eight 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::uchar8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(1))) unsigned char simd_packed_uchar8; + +/*! @abstract A vector of sixteen 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::uchar16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(1))) unsigned char simd_packed_uchar16; + +/*! @abstract A vector of thirty-two 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::uchar32. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(1))) unsigned char simd_packed_uchar32; + +/*! @abstract A vector of sixty-four 8-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::uchar64. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(64),__aligned__(1))) unsigned char simd_packed_uchar64; + +/*! @abstract A vector of two 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::short2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(2))) short simd_packed_short2; + +/*! @abstract A vector of four 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::short4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(2))) short simd_packed_short4; + +/*! @abstract A vector of eight 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::short8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(2))) short simd_packed_short8; + +/*! @abstract A vector of sixteen 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as + * simd::packed::short16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(2))) short simd_packed_short16; + +/*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description In C++ this type is also available as + * simd::packed::short32. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(2))) short simd_packed_short32; + +/*! @abstract A vector of two 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ushort2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(2))) unsigned short simd_packed_ushort2; + +/*! @abstract A vector of four 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ushort4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(2))) unsigned short simd_packed_ushort4; + +/*! @abstract A vector of eight 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::ushort8. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(2))) unsigned short simd_packed_ushort8; + +/*! @abstract A vector of sixteen 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::ushort16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(2))) unsigned short simd_packed_ushort16; + +/*! @abstract A vector of thirty-two 16-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::ushort32. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(2))) unsigned short simd_packed_ushort32; + +/*! @abstract A vector of two 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::int2. The alignment of this type is that of the underlying + * scalar element type, so you can use it to load or store from an array of + * that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) int simd_packed_int2; + +/*! @abstract A vector of four 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::int4. The alignment of this type is that of the underlying + * scalar element type, so you can use it to load or store from an array of + * that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) int simd_packed_int4; + +/*! @abstract A vector of eight 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::int8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) int simd_packed_int8; + +/*! @abstract A vector of sixteen 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::int16. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) int simd_packed_int16; + +/*! @abstract A vector of two 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uint2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) unsigned int simd_packed_uint2; + +/*! @abstract A vector of four 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::uint4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) unsigned int simd_packed_uint4; + +/*! @abstract A vector of eight 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::uint8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) unsigned int simd_packed_uint8; + +/*! @abstract A vector of sixteen 32-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::uint16. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) unsigned int simd_packed_uint16; + +/*! @abstract A vector of two 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::float2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) float simd_packed_float2; + +/*! @abstract A vector of four 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::float4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) float simd_packed_float4; + +/*! @abstract A vector of eight 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::float8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) float simd_packed_float8; + +/*! @abstract A vector of sixteen 32-bit floating-point numbers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::float16. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(4))) float simd_packed_float16; + +/*! @abstract A vector of two 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::long2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) simd_long1 simd_packed_long2; +#else +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) simd_long1 simd_packed_long2; +#endif + +/*! @abstract A vector of four 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::long4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) simd_long1 simd_packed_long4; +#else +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) simd_long1 simd_packed_long4; +#endif + +/*! @abstract A vector of eight 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C++ this type is also available as simd::packed::long8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) simd_long1 simd_packed_long8; +#else +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) simd_long1 simd_packed_long8; +#endif + +/*! @abstract A vector of two 64-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ulong2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) simd_ulong1 simd_packed_ulong2; +#else +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) simd_ulong1 simd_packed_ulong2; +#endif + +/*! @abstract A vector of four 64-bit unsigned integers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::ulong4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) simd_ulong1 simd_packed_ulong4; +#else +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) simd_ulong1 simd_packed_ulong4; +#endif + +/*! @abstract A vector of eight 64-bit unsigned integers with relaxed + * alignment. + * @description In C++ this type is also available as simd::packed::ulong8. + * This type is not available in Metal. The alignment of this type is only + * that of the underlying scalar element type, so you can use it to load or + * store from an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) simd_ulong1 simd_packed_ulong8; +#else +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) simd_ulong1 simd_packed_ulong8; +#endif + +/*! @abstract A vector of two 64-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::double2. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(2),__aligned__(8))) double simd_packed_double2; +#else +typedef __attribute__((__ext_vector_type__(2),__aligned__(4))) double simd_packed_double2; +#endif + +/*! @abstract A vector of four 64-bit floating-point numbers with relaxed + * alignment. + * @description In C++ and Metal, this type is also available as + * simd::packed::double4. The alignment of this type is that of the + * underlying scalar element type, so you can use it to load or store from + * an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(4),__aligned__(8))) double simd_packed_double4; +#else +typedef __attribute__((__ext_vector_type__(4),__aligned__(4))) double simd_packed_double4; +#endif + +/*! @abstract A vector of eight 64-bit floating-point numbers with relaxed + * alignment. + * @description In C++ this type is also available as + * simd::packed::double8. This type is not available in Metal. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +#if defined __LP64__ +typedef __attribute__((__ext_vector_type__(8),__aligned__(8))) double simd_packed_double8; +#else +typedef __attribute__((__ext_vector_type__(8),__aligned__(4))) double simd_packed_double8; +#endif + +/* MARK: C++ vector types */ +#if defined __cplusplus +namespace simd { + namespace packed { + /*! @abstract A vector of two 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_char2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_char2 char2; + + /*! @abstract A vector of four 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_char4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_char4 char4; + + /*! @abstract A vector of eight 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char8 char8; + + /*! @abstract A vector of sixteen 8-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char16 char16; + + /*! @abstract A vector of thirty-two 8-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char32 char32; + + /*! @abstract A vector of sixty-four 8-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_char64. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_char64 char64; + + /*! @abstract A vector of two 8-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uchar2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uchar2 uchar2; + + /*! @abstract A vector of four 8-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uchar4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uchar4 uchar4; + + /*! @abstract A vector of eight 8-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar8 uchar8; + + /*! @abstract A vector of sixteen 8-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar16 uchar16; + + /*! @abstract A vector of thirty-two 8-bit unsigned integers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar32 uchar32; + + /*! @abstract A vector of sixty-four 8-bit unsigned integers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uchar64. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uchar64 uchar64; + + /*! @abstract A vector of two 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_short2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_short2 short2; + + /*! @abstract A vector of four 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_short4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_short4 short4; + + /*! @abstract A vector of eight 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_short8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_short8 short8; + + /*! @abstract A vector of sixteen 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_short16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_short16 short16; + + /*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_short32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_short32 short32; + + /*! @abstract A vector of two 16-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ushort2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ushort2 ushort2; + + /*! @abstract A vector of four 16-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ushort4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ushort4 ushort4; + + /*! @abstract A vector of eight 16-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ushort8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ushort8 ushort8; + + /*! @abstract A vector of sixteen 16-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ushort16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ushort16 ushort16; + + /*! @abstract A vector of thirty-two 16-bit unsigned integers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ushort32. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ushort32 ushort32; + + /*! @abstract A vector of two 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_int2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_int2 int2; + + /*! @abstract A vector of four 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_int4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_int4 int4; + + /*! @abstract A vector of eight 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_int8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_int8 int8; + + /*! @abstract A vector of sixteen 32-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_int16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_int16 int16; + + /*! @abstract A vector of two 32-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uint2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uint2 uint2; + + /*! @abstract A vector of four 32-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_uint4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_uint4 uint4; + + /*! @abstract A vector of eight 32-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uint8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uint8 uint8; + + /*! @abstract A vector of sixteen 32-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_uint16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_uint16 uint16; + + /*! @abstract A vector of two 32-bit floating-point numbers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_float2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_float2 float2; + + /*! @abstract A vector of four 32-bit floating-point numbers with + * relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_float4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_float4 float4; + + /*! @abstract A vector of eight 32-bit floating-point numbers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_float8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_float8 float8; + + /*! @abstract A vector of sixteen 32-bit floating-point numbers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_float16. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_float16 float16; + + /*! @abstract A vector of two 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_long2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_long2 long2; + + /*! @abstract A vector of four 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_long4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_long4 long4; + + /*! @abstract A vector of eight 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_long8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_long8 long8; + + /*! @abstract A vector of two 64-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ulong2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ulong2 ulong2; + + /*! @abstract A vector of four 64-bit unsigned integers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_ulong4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_ulong4 ulong4; + + /*! @abstract A vector of eight 64-bit unsigned integers with relaxed + * alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_ulong8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_ulong8 ulong8; + + /*! @abstract A vector of two 64-bit floating-point numbers with relaxed + * alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_double2. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_double2 double2; + + /*! @abstract A vector of four 64-bit floating-point numbers with + * relaxed alignment. + * @description In C or Objective-C, this type is available as + * simd_packed_double4. The alignment of this type is only that of the + * underlying scalar element type, so you can use it to load or store + * from an array of that type. */ +typedef ::simd_packed_double4 double4; + + /*! @abstract A vector of eight 64-bit floating-point numbers with + * relaxed alignment. + * @description This type is not available in Metal. In C or + * Objective-C, this type is available as simd_packed_double8. The + * alignment of this type is only that of the underlying scalar element + * type, so you can use it to load or store from an array of that type. */ +typedef ::simd_packed_double8 double8; + + } /* namespace simd::packed:: */ +} /* namespace simd:: */ +#endif /* __cplusplus */ + +/* MARK: Deprecated vector types */ +/*! @group Deprecated vector types + * @discussion These are the original types used by earlier versions of the + * simd library; they are provided here for compatability with existing source + * files. Use the new ("simd_"-prefixed) types for future development. */ +/*! @abstract A vector of two 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char2 + * or simd::packed::char2 instead. */ +typedef simd_packed_char2 packed_char2; + +/*! @abstract A vector of four 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char4 + * or simd::packed::char4 instead. */ +typedef simd_packed_char4 packed_char4; + +/*! @abstract A vector of eight 8-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char8 + * or simd::packed::char8 instead. */ +typedef simd_packed_char8 packed_char8; + +/*! @abstract A vector of sixteen 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char16 + * or simd::packed::char16 instead. */ +typedef simd_packed_char16 packed_char16; + +/*! @abstract A vector of thirty-two 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char32 + * or simd::packed::char32 instead. */ +typedef simd_packed_char32 packed_char32; + +/*! @abstract A vector of sixty-four 8-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_char64 + * or simd::packed::char64 instead. */ +typedef simd_packed_char64 packed_char64; + +/*! @abstract A vector of two 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar2 + * or simd::packed::uchar2 instead. */ +typedef simd_packed_uchar2 packed_uchar2; + +/*! @abstract A vector of four 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar4 + * or simd::packed::uchar4 instead. */ +typedef simd_packed_uchar4 packed_uchar4; + +/*! @abstract A vector of eight 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar8 + * or simd::packed::uchar8 instead. */ +typedef simd_packed_uchar8 packed_uchar8; + +/*! @abstract A vector of sixteen 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar16 + * or simd::packed::uchar16 instead. */ +typedef simd_packed_uchar16 packed_uchar16; + +/*! @abstract A vector of thirty-two 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar32 + * or simd::packed::uchar32 instead. */ +typedef simd_packed_uchar32 packed_uchar32; + +/*! @abstract A vector of sixty-four 8-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uchar64 + * or simd::packed::uchar64 instead. */ +typedef simd_packed_uchar64 packed_uchar64; + +/*! @abstract A vector of two 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short2 + * or simd::packed::short2 instead. */ +typedef simd_packed_short2 packed_short2; + +/*! @abstract A vector of four 16-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short4 + * or simd::packed::short4 instead. */ +typedef simd_packed_short4 packed_short4; + +/*! @abstract A vector of eight 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short8 + * or simd::packed::short8 instead. */ +typedef simd_packed_short8 packed_short8; + +/*! @abstract A vector of sixteen 16-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short16 + * or simd::packed::short16 instead. */ +typedef simd_packed_short16 packed_short16; + +/*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_short32 + * or simd::packed::short32 instead. */ +typedef simd_packed_short32 packed_short32; + +/*! @abstract A vector of two 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ushort2 + * or simd::packed::ushort2 instead. */ +typedef simd_packed_ushort2 packed_ushort2; + +/*! @abstract A vector of four 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ushort4 + * or simd::packed::ushort4 instead. */ +typedef simd_packed_ushort4 packed_ushort4; + +/*! @abstract A vector of eight 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ushort8 + * or simd::packed::ushort8 instead. */ +typedef simd_packed_ushort8 packed_ushort8; + +/*! @abstract A vector of sixteen 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use + * simd_packed_ushort16 or simd::packed::ushort16 instead. */ +typedef simd_packed_ushort16 packed_ushort16; + +/*! @abstract A vector of thirty-two 16-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use + * simd_packed_ushort32 or simd::packed::ushort32 instead. */ +typedef simd_packed_ushort32 packed_ushort32; + +/*! @abstract A vector of two 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int2 or + * simd::packed::int2 instead. */ +typedef simd_packed_int2 packed_int2; + +/*! @abstract A vector of four 32-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int4 or + * simd::packed::int4 instead. */ +typedef simd_packed_int4 packed_int4; + +/*! @abstract A vector of eight 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int8 or + * simd::packed::int8 instead. */ +typedef simd_packed_int8 packed_int8; + +/*! @abstract A vector of sixteen 32-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_int16 + * or simd::packed::int16 instead. */ +typedef simd_packed_int16 packed_int16; + +/*! @abstract A vector of two 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint2 + * or simd::packed::uint2 instead. */ +typedef simd_packed_uint2 packed_uint2; + +/*! @abstract A vector of four 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint4 + * or simd::packed::uint4 instead. */ +typedef simd_packed_uint4 packed_uint4; + +/*! @abstract A vector of eight 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint8 + * or simd::packed::uint8 instead. */ +typedef simd_packed_uint8 packed_uint8; + +/*! @abstract A vector of sixteen 32-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_uint16 + * or simd::packed::uint16 instead. */ +typedef simd_packed_uint16 packed_uint16; + +/*! @abstract A vector of two 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float2 + * or simd::packed::float2 instead. */ +typedef simd_packed_float2 packed_float2; + +/*! @abstract A vector of four 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float4 + * or simd::packed::float4 instead. */ +typedef simd_packed_float4 packed_float4; + +/*! @abstract A vector of eight 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float8 + * or simd::packed::float8 instead. */ +typedef simd_packed_float8 packed_float8; + +/*! @abstract A vector of sixteen 32-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_float16 + * or simd::packed::float16 instead. */ +typedef simd_packed_float16 packed_float16; + +/*! @abstract A vector of two 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_long2 + * or simd::packed::long2 instead. */ +typedef simd_packed_long2 packed_long2; + +/*! @abstract A vector of four 64-bit signed (twos-complement) integers with + * relaxed alignment. + * @description This type is deprecated; you should use simd_packed_long4 + * or simd::packed::long4 instead. */ +typedef simd_packed_long4 packed_long4; + +/*! @abstract A vector of eight 64-bit signed (twos-complement) integers + * with relaxed alignment. + * @description This type is deprecated; you should use simd_packed_long8 + * or simd::packed::long8 instead. */ +typedef simd_packed_long8 packed_long8; + +/*! @abstract A vector of two 64-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ulong2 + * or simd::packed::ulong2 instead. */ +typedef simd_packed_ulong2 packed_ulong2; + +/*! @abstract A vector of four 64-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ulong4 + * or simd::packed::ulong4 instead. */ +typedef simd_packed_ulong4 packed_ulong4; + +/*! @abstract A vector of eight 64-bit unsigned integers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_ulong8 + * or simd::packed::ulong8 instead. */ +typedef simd_packed_ulong8 packed_ulong8; + +/*! @abstract A vector of two 64-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_double2 + * or simd::packed::double2 instead. */ +typedef simd_packed_double2 packed_double2; + +/*! @abstract A vector of four 64-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_double4 + * or simd::packed::double4 instead. */ +typedef simd_packed_double4 packed_double4; + +/*! @abstract A vector of eight 64-bit floating-point numbers with relaxed + * alignment. + * @description This type is deprecated; you should use simd_packed_double8 + * or simd::packed::double8 instead. */ +typedef simd_packed_double8 packed_double8; + +# endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif diff --git a/vfsoverlay/quaternion.h b/vfsoverlay/quaternion.h new file mode 100644 index 00000000..bb9211b8 --- /dev/null +++ b/vfsoverlay/quaternion.h @@ -0,0 +1,1194 @@ +/*! @header + * This header defines functions for constructing and using quaternions. + * @copyright 2015-2016 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_QUATERNIONS +#define SIMD_QUATERNIONS + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* MARK: - C and Objective-C float interfaces */ + +/*! @abstract Constructs a quaternion from four scalar values. + * + * @param ix The first component of the imaginary (vector) part. + * @param iy The second component of the imaginary (vector) part. + * @param iz The third component of the imaginary (vector) part. + * + * @param r The real (scalar) part. */ +static inline SIMD_CFUNC simd_quatf simd_quaternion(float ix, float iy, float iz, float r) { + return (simd_quatf){ { ix, iy, iz, r } }; +} + +/*! @abstract Constructs a quaternion from an array of four scalars. + * + * @discussion Note that the imaginary part of the quaternion comes from + * array elements 0, 1, and 2, and the real part comes from element 3. */ +static inline SIMD_NONCONST simd_quatf simd_quaternion(const float xyzr[4]) { + return (simd_quatf){ *(const simd_packed_float4 *)xyzr }; +} + +/*! @abstract Constructs a quaternion from a four-element vector. + * + * @discussion Note that the imaginary (vector) part of the quaternion comes + * from lanes 0, 1, and 2 of the vector, and the real (scalar) part comes from + * lane 3. */ +static inline SIMD_CFUNC simd_quatf simd_quaternion(simd_float4 xyzr) { + return (simd_quatf){ xyzr }; +} + +/*! @abstract Constructs a quaternion that rotates by `angle` radians about + * `axis`. */ +static inline SIMD_CFUNC simd_quatf simd_quaternion(float angle, simd_float3 axis); + +/*! @abstract Construct a quaternion that rotates from one vector to another. + * + * @param from A normalized three-element vector. + * @param to A normalized three-element vector. + * + * @discussion The rotation axis is `simd_cross(from, to)`. If `from` and + * `to` point in opposite directions (to within machine precision), an + * arbitrary rotation axis is chosen, and the angle is pi radians. */ +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3 from, simd_float3 to); + +/*! @abstract Construct a quaternion from a 3x3 rotation `matrix`. + * + * @discussion If `matrix` is not orthogonal with determinant 1, the result + * is undefined. */ +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3x3 matrix); + +/*! @abstract Construct a quaternion from a 4x4 rotation `matrix`. + * + * @discussion The last row and column of the matrix are ignored. This + * function is equivalent to calling simd_quaternion with the upper-left 3x3 + * submatrix . */ +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float4x4 matrix); + +/*! @abstract The real (scalar) part of the quaternion `q`. */ +static inline SIMD_CFUNC float simd_real(simd_quatf q) { + return q.vector.w; +} + +/*! @abstract The imaginary (vector) part of the quaternion `q`. */ +static inline SIMD_CFUNC simd_float3 simd_imag(simd_quatf q) { + return q.vector.xyz; +} + +/*! @abstract The angle (in radians) of rotation represented by `q`. */ +static inline SIMD_CFUNC float simd_angle(simd_quatf q); + +/*! @abstract The normalized axis (a 3-element vector) around which the + * action of the quaternion `q` rotates. */ +static inline SIMD_CFUNC simd_float3 simd_axis(simd_quatf q); + +/*! @abstract The sum of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatf simd_add(simd_quatf p, simd_quatf q); + +/*! @abstract The difference of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatf simd_sub(simd_quatf p, simd_quatf q); + +/*! @abstract The product of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf p, simd_quatf q); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf q, float a); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatf simd_mul(float a, simd_quatf q); + +/*! @abstract The conjugate of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatf simd_conjugate(simd_quatf q); + +/*! @abstract The (multiplicative) inverse of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatf simd_inverse(simd_quatf q); + +/*! @abstract The negation (additive inverse) of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatf simd_negate(simd_quatf q); + +/*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ +static inline SIMD_CFUNC float simd_dot(simd_quatf p, simd_quatf q); + +/*! @abstract The length of the quaternion `q`. */ +static inline SIMD_CFUNC float simd_length(simd_quatf q); + +/*! @abstract The unit quaternion obtained by normalizing `q`. */ +static inline SIMD_CFUNC simd_quatf simd_normalize(simd_quatf q); + +/*! @abstract Rotates the vector `v` by the quaternion `q`. */ +static inline SIMD_CFUNC simd_float3 simd_act(simd_quatf q, simd_float3 v); + +/*! @abstract Logarithm of the quaternion `q`. + * @discussion Do not call this function directly; use `log(q)` instead. + * + * We can write a quaternion `q` in the form: `r(cos(t) + sin(t)v)` where + * `r` is the length of `q`, `t` is an angle, and `v` is a unit 3-vector. + * The logarithm of `q` is `log(r) + tv`, just like the logarithm of the + * complex number `r*(cos(t) + i sin(t))` is `log(r) + it`. + * + * Note that this function is not robust against poorly-scaled non-unit + * quaternions, because it is primarily used for spline interpolation of + * unit quaternions. If you need to compute a robust logarithm of general + * quaternions, you can use the following approach: + * + * scale = simd_reduce_max(simd_abs(q.vector)); + * logq = log(simd_recip(scale)*q); + * logq.real += log(scale); + * return logq; */ +static SIMD_NOINLINE simd_quatf __tg_log(simd_quatf q); + +/*! @abstract Inverse of `log( )`; the exponential map on quaternions. + * @discussion Do not call this function directly; use `exp(q)` instead. */ +static SIMD_NOINLINE simd_quatf __tg_exp(simd_quatf q); + +/*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatf simd_slerp(simd_quatf q0, simd_quatf q1, float t); + +/*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatf simd_slerp_longest(simd_quatf q0, simd_quatf q1, float t); + +/*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ +static SIMD_NOINLINE simd_quatf simd_spline(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t); + +/*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ +static SIMD_NOINLINE simd_quatf simd_bezier(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t); + +#ifdef __cplusplus +} /* extern "C" */ +/* MARK: - C++ float interfaces */ + +namespace simd { + struct quatf : ::simd_quatf { + /*! @abstract The identity quaternion. */ + quatf( ) : ::simd_quatf(::simd_quaternion((float4){0,0,0,1})) { } + + /*! @abstract Constructs a C++ quaternion from a C quaternion. */ + quatf(::simd_quatf q) : ::simd_quatf(q) { } + + /*! @abstract Constructs a quaternion from components. */ + quatf(float ix, float iy, float iz, float r) : ::simd_quatf(::simd_quaternion(ix, iy, iz, r)) { } + + /*! @abstract Constructs a quaternion from an array of scalars. */ + quatf(const float xyzr[4]) : ::simd_quatf(::simd_quaternion(xyzr)) { } + + /*! @abstract Constructs a quaternion from a vector. */ + quatf(float4 xyzr) : ::simd_quatf(::simd_quaternion(xyzr)) { } + + /*! @abstract Quaternion representing rotation about `axis` by `angle` + * radians. */ + quatf(float angle, float3 axis) : ::simd_quatf(::simd_quaternion(angle, axis)) { } + + /*! @abstract Quaternion that rotates `from` into `to`. */ + quatf(float3 from, float3 to) : ::simd_quatf(::simd_quaternion(from, to)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatf(::simd_float3x3 matrix) : ::simd_quatf(::simd_quaternion(matrix)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatf(::simd_float4x4 matrix) : ::simd_quatf(::simd_quaternion(matrix)) { } + + /*! @abstract The real (scalar) part of the quaternion. */ + float real(void) const { return ::simd_real(*this); } + + /*! @abstract The imaginary (vector) part of the quaternion. */ + float3 imag(void) const { return ::simd_imag(*this); } + + /*! @abstract The angle the quaternion rotates by. */ + float angle(void) const { return ::simd_angle(*this); } + + /*! @abstract The axis the quaternion rotates about. */ + float3 axis(void) const { return ::simd_axis(*this); } + + /*! @abstract The length of the quaternion. */ + float length(void) const { return ::simd_length(*this); } + + /*! @abstract Act on the vector `v` by rotation. */ + float3 operator()(const ::simd_float3 v) const { return ::simd_act(*this, v); } + }; + + static SIMD_CPPFUNC quatf operator+(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_add(p, q); } + static SIMD_CPPFUNC quatf operator-(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_sub(p, q); } + static SIMD_CPPFUNC quatf operator-(const ::simd_quatf p) { return ::simd_negate(p); } + static SIMD_CPPFUNC quatf operator*(const float r, const ::simd_quatf p) { return ::simd_mul(r, p); } + static SIMD_CPPFUNC quatf operator*(const ::simd_quatf p, const float r) { return ::simd_mul(p, r); } + static SIMD_CPPFUNC quatf operator*(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_mul(p, q); } + static SIMD_CPPFUNC quatf operator/(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_mul(p, ::simd_inverse(q)); } + static SIMD_INLINE SIMD_NODEBUG quatf operator+=(quatf &p, const ::simd_quatf q) { return p = p+q; } + static SIMD_INLINE SIMD_NODEBUG quatf operator-=(quatf &p, const ::simd_quatf q) { return p = p-q; } + static SIMD_INLINE SIMD_NODEBUG quatf operator*=(quatf &p, const float r) { return p = p*r; } + static SIMD_INLINE SIMD_NODEBUG quatf operator*=(quatf &p, const ::simd_quatf q) { return p = p*q; } + static SIMD_INLINE SIMD_NODEBUG quatf operator/=(quatf &p, const ::simd_quatf q) { return p = p/q; } + + /*! @abstract The conjugate of the quaternion `q`. */ + static SIMD_CPPFUNC quatf conjugate(const ::simd_quatf p) { return ::simd_conjugate(p); } + + /*! @abstract The (multiplicative) inverse of the quaternion `q`. */ + static SIMD_CPPFUNC quatf inverse(const ::simd_quatf p) { return ::simd_inverse(p); } + + /*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ + static SIMD_CPPFUNC float dot(const ::simd_quatf p, const ::simd_quatf q) { return ::simd_dot(p, q); } + + /*! @abstract The unit quaternion obtained by normalizing `q`. */ + static SIMD_CPPFUNC quatf normalize(const ::simd_quatf p) { return ::simd_normalize(p); } + + /*! @abstract logarithm of the quaternion `q`. */ + static SIMD_CPPFUNC quatf log(const ::simd_quatf q) { return ::__tg_log(q); } + + /*! @abstract exponential map of quaterion `q`. */ + static SIMD_CPPFUNC quatf exp(const ::simd_quatf q) { return ::__tg_exp(q); } + + /*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatf slerp(const ::simd_quatf p0, const ::simd_quatf p1, float t) { return ::simd_slerp(p0, p1, t); } + + /*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatf slerp_longest(const ::simd_quatf p0, const ::simd_quatf p1, float t) { return ::simd_slerp_longest(p0, p1, t); } + + /*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ + static SIMD_CPPFUNC quatf spline(const ::simd_quatf p0, const ::simd_quatf p1, const ::simd_quatf p2, const ::simd_quatf p3, float t) { return ::simd_spline(p0, p1, p2, p3, t); } + + /*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ + static SIMD_CPPFUNC quatf bezier(const ::simd_quatf p0, const ::simd_quatf p1, const ::simd_quatf p2, const ::simd_quatf p3, float t) { return ::simd_bezier(p0, p1, p2, p3, t); } +} + +extern "C" { +#endif /* __cplusplus */ + +/* MARK: - float implementations */ + +#include +#include + +/* tg_promote is implementation gobbledygook that enables the compile-time + * dispatching in tgmath.h to work its magic. */ +static simd_quatf __attribute__((__overloadable__)) __tg_promote(simd_quatf); + +/*! @abstract Constructs a quaternion from imaginary and real parts. + * @discussion This function is hidden behind an underscore to avoid confusion + * with the angle-axis constructor. */ +static inline SIMD_CFUNC simd_quatf _simd_quaternion(simd_float3 imag, float real) { + return simd_quaternion(simd_make_float4(imag, real)); +} + +static inline SIMD_CFUNC simd_quatf simd_quaternion(float angle, simd_float3 axis) { + return _simd_quaternion(sin(angle/2) * axis, cos(angle/2)); +} + +static inline SIMD_CFUNC float simd_angle(simd_quatf q) { + return 2*atan2(simd_length(q.vector.xyz), q.vector.w); +} + +static inline SIMD_CFUNC simd_float3 simd_axis(simd_quatf q) { + return simd_normalize(q.vector.xyz); +} + +static inline SIMD_CFUNC simd_quatf simd_add(simd_quatf p, simd_quatf q) { + return simd_quaternion(p.vector + q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_sub(simd_quatf p, simd_quatf q) { + return simd_quaternion(p.vector - q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf p, simd_quatf q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion((p.vector.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + p.vector.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5)) + + (p.vector.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6) + + p.vector.w * q.vector)); +} + +static inline SIMD_CFUNC simd_quatf simd_mul(simd_quatf q, float a) { + return simd_quaternion(a * q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_mul(float a, simd_quatf q) { + return simd_mul(q,a); +} + +static inline SIMD_CFUNC simd_quatf simd_conjugate(simd_quatf q) { + return simd_quaternion(q.vector * (simd_float4){-1,-1,-1, 1}); +} + +static inline SIMD_CFUNC simd_quatf simd_inverse(simd_quatf q) { + return simd_quaternion(simd_conjugate(q).vector * simd_recip(simd_length_squared(q.vector))); +} + +static inline SIMD_CFUNC simd_quatf simd_negate(simd_quatf q) { + return simd_quaternion(-q.vector); +} + +static inline SIMD_CFUNC float simd_dot(simd_quatf p, simd_quatf q) { + return simd_dot(p.vector, q.vector); +} + +static inline SIMD_CFUNC float simd_length(simd_quatf q) { + return simd_length(q.vector); +} + +static inline SIMD_CFUNC simd_quatf simd_normalize(simd_quatf q) { + float length_squared = simd_length_squared(q.vector); + if (length_squared == 0) { + return simd_quaternion((simd_float4){0,0,0,1}); + } + return simd_quaternion(q.vector * simd_rsqrt(length_squared)); +} + +#if defined __arm__ || defined __arm64__ +/*! @abstract Multiplies the vector `v` by the quaternion `q`. + * + * @discussion This IS NOT the action of `q` on `v` (i.e. this is not rotation + * by `q`. That operation is provided by `simd_act(q, v)`. This function is an + * implementation detail and you should not call it directly. It may be + * removed or modified in future versions of the simd module. */ +static inline SIMD_CFUNC simd_quatf _simd_mul_vq(simd_float3 v, simd_quatf q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion(v.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + v.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5) + + v.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6)); +} +#endif + +static inline SIMD_CFUNC simd_float3 simd_act(simd_quatf q, simd_float3 v) { +#if defined __arm__ || defined __arm64__ + return simd_mul(q, _simd_mul_vq(v, simd_conjugate(q))).vector.xyz; +#else + #pragma STDC FP_CONTRACT ON + simd_float3 t = 2*simd_cross(simd_imag(q),v); + return v + simd_real(q)*t + simd_cross(simd_imag(q), t); +#endif +} + +static SIMD_NOINLINE simd_quatf __tg_log(simd_quatf q) { + float real = __tg_log(simd_length_squared(q.vector))/2; + if (simd_equal(simd_imag(q), 0)) return _simd_quaternion(0, real); + simd_float3 imag = __tg_acos(simd_real(q)/simd_length(q)) * simd_normalize(simd_imag(q)); + return _simd_quaternion(imag, real); +} + +static SIMD_NOINLINE simd_quatf __tg_exp(simd_quatf q) { + // angle is actually *twice* the angle of the rotation corresponding to + // the resulting quaternion, which is why we don't simply use the (angle, + // axis) constructor to generate `unit`. + float angle = simd_length(simd_imag(q)); + if (angle == 0) return _simd_quaternion(0, exp(simd_real(q))); + simd_float3 axis = simd_normalize(simd_imag(q)); + simd_quatf unit = _simd_quaternion(sin(angle)*axis, cosf(angle)); + return simd_mul(exp(simd_real(q)), unit); +} + +/*! @abstract Implementation detail of the `simd_quaternion(from, to)` + * initializer. + * + * @discussion Computes the quaternion rotation `from` to `to` if they are + * separated by less than 90 degrees. Not numerically stable for larger + * angles. This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static inline SIMD_CFUNC simd_quatf _simd_quaternion_reduced(simd_float3 from, simd_float3 to) { + simd_float3 half = simd_normalize(from + to); + return _simd_quaternion(simd_cross(from, half), simd_dot(from, half)); +} + +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3 from, simd_float3 to) { + + // If the angle between from and to is not too big, we can compute the + // rotation accurately using a simple implementation. + if (simd_dot(from, to) >= 0) { + return _simd_quaternion_reduced(from, to); + } + + // Because from and to are more than 90 degrees apart, we compute the + // rotation in two stages (from -> half), (half -> to) to preserve numerical + // accuracy. + simd_float3 half = simd_normalize(from) + simd_normalize(to); + + if (simd_length_squared(half) <= 0x1p-46f) { + // half is nearly zero, so from and to point in nearly opposite directions + // and the rotation is numerically underspecified. Pick an axis orthogonal + // to the vectors, and use an angle of pi radians. + simd_float3 abs_from = simd_abs(from); + if (abs_from.x <= abs_from.y && abs_from.x <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_float3){1,0,0})), 0.f); + else if (abs_from.y <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_float3){0,1,0})), 0.f); + else + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_float3){0,0,1})), 0.f); + } + + // Compute the two-step rotation. */ + half = simd_normalize(half); + return simd_mul(_simd_quaternion_reduced(from, half), + _simd_quaternion_reduced(half, to)); +} + +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float3x3 matrix) { + const simd_float3 *mat = matrix.columns; + float trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + float r = 2*sqrt(1 + trace); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + float rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + float r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +static SIMD_NOINLINE simd_quatf simd_quaternion(simd_float4x4 matrix) { + const simd_float4 *mat = matrix.columns; + float trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + float r = 2*sqrt(1 + trace); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + float rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + float r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + float r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + float rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +/*! @abstract The angle between p and q interpreted as 4-dimensional vectors. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE float _simd_angle(simd_quatf p, simd_quatf q) { + return 2*atan2(simd_length(p.vector - q.vector), simd_length(p.vector + q.vector)); +} + +/*! @abstract sin(x)/x. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_CFUNC float _simd_sinc(float x) { + if (x == 0) return 1; + return sin(x)/x; +} + +/*! @abstract Spherical lerp between q0 and q1. + * + * @discussion This function may interpolate along either the longer or + * shorter path between q0 and q1; it is used as an implementation detail + * in `simd_slerp` and `simd_slerp_longest`; you should use those functions + * instead of calling this directly. */ +static SIMD_NOINLINE simd_quatf _simd_slerp_internal(simd_quatf q0, simd_quatf q1, float t) { + float s = 1 - t; + float a = _simd_angle(q0, q1); + float r = simd_recip(_simd_sinc(a)); + return simd_normalize(simd_quaternion(_simd_sinc(s*a)*r*s*q0.vector + _simd_sinc(t*a)*r*t*q1.vector)); +} + +static SIMD_NOINLINE simd_quatf simd_slerp(simd_quatf q0, simd_quatf q1, float t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, q1, t); + return _simd_slerp_internal(q0, simd_negate(q1), t); +} + +static SIMD_NOINLINE simd_quatf simd_slerp_longest(simd_quatf q0, simd_quatf q1, float t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, simd_negate(q1), t); + return _simd_slerp_internal(q0, q1, t); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatf _simd_intermediate(simd_quatf q0, simd_quatf q1, simd_quatf q2) { + simd_quatf p0 = __tg_log(simd_mul(q0, simd_inverse(q1))); + simd_quatf p2 = __tg_log(simd_mul(q2, simd_inverse(q1))); + return simd_normalize(simd_mul(q1, __tg_exp(simd_mul(-0.25, simd_add(p0,p2))))); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatf _simd_squad(simd_quatf q0, simd_quatf qa, simd_quatf qb, simd_quatf q1, float t) { + simd_quatf r0 = _simd_slerp_internal(q0, q1, t); + simd_quatf r1 = _simd_slerp_internal(qa, qb, t); + return _simd_slerp_internal(r0, r1, 2*t*(1 - t)); +} + +static SIMD_NOINLINE simd_quatf simd_spline(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t) { + simd_quatf qa = _simd_intermediate(q0, q1, q2); + simd_quatf qb = _simd_intermediate(q1, q2, q3); + return _simd_squad(q1, qa, qb, q2, t); +} + +static SIMD_NOINLINE simd_quatf simd_bezier(simd_quatf q0, simd_quatf q1, simd_quatf q2, simd_quatf q3, float t) { + simd_quatf q01 = _simd_slerp_internal(q0, q1, t); + simd_quatf q12 = _simd_slerp_internal(q1, q2, t); + simd_quatf q23 = _simd_slerp_internal(q2, q3, t); + simd_quatf q012 = _simd_slerp_internal(q01, q12, t); + simd_quatf q123 = _simd_slerp_internal(q12, q23, t); + return _simd_slerp_internal(q012, q123, t); +} + +/* MARK: - C and Objective-C double interfaces */ + +/*! @abstract Constructs a quaternion from four scalar values. + * + * @param ix The first component of the imaginary (vector) part. + * @param iy The second component of the imaginary (vector) part. + * @param iz The third component of the imaginary (vector) part. + * + * @param r The real (scalar) part. */ +static inline SIMD_CFUNC simd_quatd simd_quaternion(double ix, double iy, double iz, double r) { + return (simd_quatd){ { ix, iy, iz, r } }; +} + +/*! @abstract Constructs a quaternion from an array of four scalars. + * + * @discussion Note that the imaginary part of the quaternion comes from + * array elements 0, 1, and 2, and the real part comes from element 3. */ +static inline SIMD_NONCONST simd_quatd simd_quaternion(const double xyzr[4]) { + return (simd_quatd){ *(const simd_packed_double4 *)xyzr }; +} + +/*! @abstract Constructs a quaternion from a four-element vector. + * + * @discussion Note that the imaginary (vector) part of the quaternion comes + * from lanes 0, 1, and 2 of the vector, and the real (scalar) part comes from + * lane 3. */ +static inline SIMD_CFUNC simd_quatd simd_quaternion(simd_double4 xyzr) { + return (simd_quatd){ xyzr }; +} + +/*! @abstract Constructs a quaternion that rotates by `angle` radians about + * `axis`. */ +static inline SIMD_CFUNC simd_quatd simd_quaternion(double angle, simd_double3 axis); + +/*! @abstract Construct a quaternion that rotates from one vector to another. + * + * @param from A normalized three-element vector. + * @param to A normalized three-element vector. + * + * @discussion The rotation axis is `simd_cross(from, to)`. If `from` and + * `to` point in opposite directions (to within machine precision), an + * arbitrary rotation axis is chosen, and the angle is pi radians. */ +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3 from, simd_double3 to); + +/*! @abstract Construct a quaternion from a 3x3 rotation `matrix`. + * + * @discussion If `matrix` is not orthogonal with determinant 1, the result + * is undefined. */ +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3x3 matrix); + +/*! @abstract Construct a quaternion from a 4x4 rotation `matrix`. + * + * @discussion The last row and column of the matrix are ignored. This + * function is equivalent to calling simd_quaternion with the upper-left 3x3 + * submatrix . */ +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double4x4 matrix); + +/*! @abstract The real (scalar) part of the quaternion `q`. */ +static inline SIMD_CFUNC double simd_real(simd_quatd q) { + return q.vector.w; +} + +/*! @abstract The imaginary (vector) part of the quaternion `q`. */ +static inline SIMD_CFUNC simd_double3 simd_imag(simd_quatd q) { + return q.vector.xyz; +} + +/*! @abstract The angle (in radians) of rotation represented by `q`. */ +static inline SIMD_CFUNC double simd_angle(simd_quatd q); + +/*! @abstract The normalized axis (a 3-element vector) around which the + * action of the quaternion `q` rotates. */ +static inline SIMD_CFUNC simd_double3 simd_axis(simd_quatd q); + +/*! @abstract The sum of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatd simd_add(simd_quatd p, simd_quatd q); + +/*! @abstract The difference of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatd simd_sub(simd_quatd p, simd_quatd q); + +/*! @abstract The product of the quaternions `p` and `q`. */ +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd p, simd_quatd q); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd q, double a); + +/*! @abstract The quaternion `q` scaled by the real value `a`. */ +static inline SIMD_CFUNC simd_quatd simd_mul(double a, simd_quatd q); + +/*! @abstract The conjugate of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatd simd_conjugate(simd_quatd q); + +/*! @abstract The (multiplicative) inverse of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatd simd_inverse(simd_quatd q); + +/*! @abstract The negation (additive inverse) of the quaternion `q`. */ +static inline SIMD_CFUNC simd_quatd simd_negate(simd_quatd q); + +/*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ +static inline SIMD_CFUNC double simd_dot(simd_quatd p, simd_quatd q); + +/*! @abstract The length of the quaternion `q`. */ +static inline SIMD_CFUNC double simd_length(simd_quatd q); + +/*! @abstract The unit quaternion obtained by normalizing `q`. */ +static inline SIMD_CFUNC simd_quatd simd_normalize(simd_quatd q); + +/*! @abstract Rotates the vector `v` by the quaternion `q`. */ +static inline SIMD_CFUNC simd_double3 simd_act(simd_quatd q, simd_double3 v); + +/*! @abstract Logarithm of the quaternion `q`. + * @discussion Do not call this function directly; use `log(q)` instead. + * + * We can write a quaternion `q` in the form: `r(cos(t) + sin(t)v)` where + * `r` is the length of `q`, `t` is an angle, and `v` is a unit 3-vector. + * The logarithm of `q` is `log(r) + tv`, just like the logarithm of the + * complex number `r*(cos(t) + i sin(t))` is `log(r) + it`. + * + * Note that this function is not robust against poorly-scaled non-unit + * quaternions, because it is primarily used for spline interpolation of + * unit quaternions. If you need to compute a robust logarithm of general + * quaternions, you can use the following approach: + * + * scale = simd_reduce_max(simd_abs(q.vector)); + * logq = log(simd_recip(scale)*q); + * logq.real += log(scale); + * return logq; */ +static SIMD_NOINLINE simd_quatd __tg_log(simd_quatd q); + +/*! @abstract Inverse of `log( )`; the exponential map on quaternions. + * @discussion Do not call this function directly; use `exp(q)` instead. */ +static SIMD_NOINLINE simd_quatd __tg_exp(simd_quatd q); + +/*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatd simd_slerp(simd_quatd q0, simd_quatd q1, double t); + +/*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ +static SIMD_NOINLINE simd_quatd simd_slerp_longest(simd_quatd q0, simd_quatd q1, double t); + +/*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ +static SIMD_NOINLINE simd_quatd simd_spline(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t); + +/*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ +static SIMD_NOINLINE simd_quatd simd_bezier(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t); + +#ifdef __cplusplus +} /* extern "C" */ +/* MARK: - C++ double interfaces */ + +namespace simd { + struct quatd : ::simd_quatd { + /*! @abstract The identity quaternion. */ + quatd( ) : ::simd_quatd(::simd_quaternion((double4){0,0,0,1})) { } + + /*! @abstract Constructs a C++ quaternion from a C quaternion. */ + quatd(::simd_quatd q) : ::simd_quatd(q) { } + + /*! @abstract Constructs a quaternion from components. */ + quatd(double ix, double iy, double iz, double r) : ::simd_quatd(::simd_quaternion(ix, iy, iz, r)) { } + + /*! @abstract Constructs a quaternion from an array of scalars. */ + quatd(const double xyzr[4]) : ::simd_quatd(::simd_quaternion(xyzr)) { } + + /*! @abstract Constructs a quaternion from a vector. */ + quatd(double4 xyzr) : ::simd_quatd(::simd_quaternion(xyzr)) { } + + /*! @abstract Quaternion representing rotation about `axis` by `angle` + * radians. */ + quatd(double angle, double3 axis) : ::simd_quatd(::simd_quaternion(angle, axis)) { } + + /*! @abstract Quaternion that rotates `from` into `to`. */ + quatd(double3 from, double3 to) : ::simd_quatd(::simd_quaternion(from, to)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatd(::simd_double3x3 matrix) : ::simd_quatd(::simd_quaternion(matrix)) { } + + /*! @abstract Constructs a quaternion from a rotation matrix. */ + quatd(::simd_double4x4 matrix) : ::simd_quatd(::simd_quaternion(matrix)) { } + + /*! @abstract The real (scalar) part of the quaternion. */ + double real(void) const { return ::simd_real(*this); } + + /*! @abstract The imaginary (vector) part of the quaternion. */ + double3 imag(void) const { return ::simd_imag(*this); } + + /*! @abstract The angle the quaternion rotates by. */ + double angle(void) const { return ::simd_angle(*this); } + + /*! @abstract The axis the quaternion rotates about. */ + double3 axis(void) const { return ::simd_axis(*this); } + + /*! @abstract The length of the quaternion. */ + double length(void) const { return ::simd_length(*this); } + + /*! @abstract Act on the vector `v` by rotation. */ + double3 operator()(const ::simd_double3 v) const { return ::simd_act(*this, v); } + }; + + static SIMD_CPPFUNC quatd operator+(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_add(p, q); } + static SIMD_CPPFUNC quatd operator-(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_sub(p, q); } + static SIMD_CPPFUNC quatd operator-(const ::simd_quatd p) { return ::simd_negate(p); } + static SIMD_CPPFUNC quatd operator*(const double r, const ::simd_quatd p) { return ::simd_mul(r, p); } + static SIMD_CPPFUNC quatd operator*(const ::simd_quatd p, const double r) { return ::simd_mul(p, r); } + static SIMD_CPPFUNC quatd operator*(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_mul(p, q); } + static SIMD_CPPFUNC quatd operator/(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_mul(p, ::simd_inverse(q)); } + static SIMD_INLINE SIMD_NODEBUG quatd operator+=(quatd &p, const ::simd_quatd q) { return p = p+q; } + static SIMD_INLINE SIMD_NODEBUG quatd operator-=(quatd &p, const ::simd_quatd q) { return p = p-q; } + static SIMD_INLINE SIMD_NODEBUG quatd operator*=(quatd &p, const double r) { return p = p*r; } + static SIMD_INLINE SIMD_NODEBUG quatd operator*=(quatd &p, const ::simd_quatd q) { return p = p*q; } + static SIMD_INLINE SIMD_NODEBUG quatd operator/=(quatd &p, const ::simd_quatd q) { return p = p/q; } + + /*! @abstract The conjugate of the quaternion `q`. */ + static SIMD_CPPFUNC quatd conjugate(const ::simd_quatd p) { return ::simd_conjugate(p); } + + /*! @abstract The (multiplicative) inverse of the quaternion `q`. */ + static SIMD_CPPFUNC quatd inverse(const ::simd_quatd p) { return ::simd_inverse(p); } + + /*! @abstract The dot product of the quaternions `p` and `q` interpreted as + * four-dimensional vectors. */ + static SIMD_CPPFUNC double dot(const ::simd_quatd p, const ::simd_quatd q) { return ::simd_dot(p, q); } + + /*! @abstract The unit quaternion obtained by normalizing `q`. */ + static SIMD_CPPFUNC quatd normalize(const ::simd_quatd p) { return ::simd_normalize(p); } + + /*! @abstract logarithm of the quaternion `q`. */ + static SIMD_CPPFUNC quatd log(const ::simd_quatd q) { return ::__tg_log(q); } + + /*! @abstract exponential map of quaterion `q`. */ + static SIMD_CPPFUNC quatd exp(const ::simd_quatd q) { return ::__tg_exp(q); } + + /*! @abstract Spherical linear interpolation along the shortest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatd slerp(const ::simd_quatd p0, const ::simd_quatd p1, double t) { return ::simd_slerp(p0, p1, t); } + + /*! @abstract Spherical linear interpolation along the longest arc between + * quaternions `q0` and `q1`. */ + static SIMD_CPPFUNC quatd slerp_longest(const ::simd_quatd p0, const ::simd_quatd p1, double t) { return ::simd_slerp_longest(p0, p1, t); } + + /*! @abstract Interpolate between quaternions along a spherical cubic spline. + * + * @discussion The function interpolates between q1 and q2. q0 is the left + * endpoint of the previous interval, and q3 is the right endpoint of the next + * interval. Use this function to smoothly interpolate between a sequence of + * rotations. */ + static SIMD_CPPFUNC quatd spline(const ::simd_quatd p0, const ::simd_quatd p1, const ::simd_quatd p2, const ::simd_quatd p3, double t) { return ::simd_spline(p0, p1, p2, p3, t); } + + /*! @abstract Spherical cubic Bezier interpolation between quaternions. + * + * @discussion The function treats q0 ... q3 as control points and uses slerp + * in place of lerp in the De Castlejeau algorithm. The endpoints of + * interpolation are thus q0 and q3, and the curve will not generally pass + * through q1 or q2. Note that the convex hull property of "standard" Bezier + * curve does not hold on the sphere. */ + static SIMD_CPPFUNC quatd bezier(const ::simd_quatd p0, const ::simd_quatd p1, const ::simd_quatd p2, const ::simd_quatd p3, double t) { return ::simd_bezier(p0, p1, p2, p3, t); } +} + +extern "C" { +#endif /* __cplusplus */ + +/* MARK: - double implementations */ + +#include +#include + +/* tg_promote is implementation gobbledygook that enables the compile-time + * dispatching in tgmath.h to work its magic. */ +static simd_quatd __attribute__((__overloadable__)) __tg_promote(simd_quatd); + +/*! @abstract Constructs a quaternion from imaginary and real parts. + * @discussion This function is hidden behind an underscore to avoid confusion + * with the angle-axis constructor. */ +static inline SIMD_CFUNC simd_quatd _simd_quaternion(simd_double3 imag, double real) { + return simd_quaternion(simd_make_double4(imag, real)); +} + +static inline SIMD_CFUNC simd_quatd simd_quaternion(double angle, simd_double3 axis) { + return _simd_quaternion(sin(angle/2) * axis, cos(angle/2)); +} + +static inline SIMD_CFUNC double simd_angle(simd_quatd q) { + return 2*atan2(simd_length(q.vector.xyz), q.vector.w); +} + +static inline SIMD_CFUNC simd_double3 simd_axis(simd_quatd q) { + return simd_normalize(q.vector.xyz); +} + +static inline SIMD_CFUNC simd_quatd simd_add(simd_quatd p, simd_quatd q) { + return simd_quaternion(p.vector + q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_sub(simd_quatd p, simd_quatd q) { + return simd_quaternion(p.vector - q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd p, simd_quatd q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion((p.vector.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + p.vector.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5)) + + (p.vector.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6) + + p.vector.w * q.vector)); +} + +static inline SIMD_CFUNC simd_quatd simd_mul(simd_quatd q, double a) { + return simd_quaternion(a * q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_mul(double a, simd_quatd q) { + return simd_mul(q,a); +} + +static inline SIMD_CFUNC simd_quatd simd_conjugate(simd_quatd q) { + return simd_quaternion(q.vector * (simd_double4){-1,-1,-1, 1}); +} + +static inline SIMD_CFUNC simd_quatd simd_inverse(simd_quatd q) { + return simd_quaternion(simd_conjugate(q).vector * simd_recip(simd_length_squared(q.vector))); +} + +static inline SIMD_CFUNC simd_quatd simd_negate(simd_quatd q) { + return simd_quaternion(-q.vector); +} + +static inline SIMD_CFUNC double simd_dot(simd_quatd p, simd_quatd q) { + return simd_dot(p.vector, q.vector); +} + +static inline SIMD_CFUNC double simd_length(simd_quatd q) { + return simd_length(q.vector); +} + +static inline SIMD_CFUNC simd_quatd simd_normalize(simd_quatd q) { + double length_squared = simd_length_squared(q.vector); + if (length_squared == 0) { + return simd_quaternion((simd_double4){0,0,0,1}); + } + return simd_quaternion(q.vector * simd_rsqrt(length_squared)); +} + +#if defined __arm__ || defined __arm64__ +/*! @abstract Multiplies the vector `v` by the quaternion `q`. + * + * @discussion This IS NOT the action of `q` on `v` (i.e. this is not rotation + * by `q`. That operation is provided by `simd_act(q, v)`. This function is an + * implementation detail and you should not call it directly. It may be + * removed or modified in future versions of the simd module. */ +static inline SIMD_CFUNC simd_quatd _simd_mul_vq(simd_double3 v, simd_quatd q) { + #pragma STDC FP_CONTRACT ON + return simd_quaternion(v.x * __builtin_shufflevector(q.vector, -q.vector, 3,6,1,4) + + v.y * __builtin_shufflevector(q.vector, -q.vector, 2,3,4,5) + + v.z * __builtin_shufflevector(q.vector, -q.vector, 5,0,3,6)); +} +#endif + +static inline SIMD_CFUNC simd_double3 simd_act(simd_quatd q, simd_double3 v) { +#if defined __arm__ || defined __arm64__ + return simd_mul(q, _simd_mul_vq(v, simd_conjugate(q))).vector.xyz; +#else + #pragma STDC FP_CONTRACT ON + simd_double3 t = 2*simd_cross(simd_imag(q),v); + return v + simd_real(q)*t + simd_cross(simd_imag(q), t); +#endif +} + +static SIMD_NOINLINE simd_quatd __tg_log(simd_quatd q) { + double real = __tg_log(simd_length_squared(q.vector))/2; + if (simd_equal(simd_imag(q), 0)) return _simd_quaternion(0, real); + simd_double3 imag = __tg_acos(simd_real(q)/simd_length(q)) * simd_normalize(simd_imag(q)); + return _simd_quaternion(imag, real); +} + +static SIMD_NOINLINE simd_quatd __tg_exp(simd_quatd q) { + // angle is actually *twice* the angle of the rotation corresponding to + // the resulting quaternion, which is why we don't simply use the (angle, + // axis) constructor to generate `unit`. + double angle = simd_length(simd_imag(q)); + if (angle == 0) return _simd_quaternion(0, exp(simd_real(q))); + simd_double3 axis = simd_normalize(simd_imag(q)); + simd_quatd unit = _simd_quaternion(sin(angle)*axis, cosf(angle)); + return simd_mul(exp(simd_real(q)), unit); +} + +/*! @abstract Implementation detail of the `simd_quaternion(from, to)` + * initializer. + * + * @discussion Computes the quaternion rotation `from` to `to` if they are + * separated by less than 90 degrees. Not numerically stable for larger + * angles. This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static inline SIMD_CFUNC simd_quatd _simd_quaternion_reduced(simd_double3 from, simd_double3 to) { + simd_double3 half = simd_normalize(from + to); + return _simd_quaternion(simd_cross(from, half), simd_dot(from, half)); +} + +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3 from, simd_double3 to) { + + // If the angle between from and to is not too big, we can compute the + // rotation accurately using a simple implementation. + if (simd_dot(from, to) >= 0) { + return _simd_quaternion_reduced(from, to); + } + + // Because from and to are more than 90 degrees apart, we compute the + // rotation in two stages (from -> half), (half -> to) to preserve numerical + // accuracy. + simd_double3 half = simd_normalize(from) + simd_normalize(to); + + if (simd_length_squared(half) <= 0x1p-104) { + // half is nearly zero, so from and to point in nearly opposite directions + // and the rotation is numerically underspecified. Pick an axis orthogonal + // to the vectors, and use an angle of pi radians. + simd_double3 abs_from = simd_abs(from); + if (abs_from.x <= abs_from.y && abs_from.x <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_double3){1,0,0})), 0.f); + else if (abs_from.y <= abs_from.z) + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_double3){0,1,0})), 0.f); + else + return _simd_quaternion(simd_normalize(simd_cross(from, (simd_double3){0,0,1})), 0.f); + } + + // Compute the two-step rotation. */ + half = simd_normalize(half); + return simd_mul(_simd_quaternion_reduced(from, half), + _simd_quaternion_reduced(half, to)); +} + +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double3x3 matrix) { + const simd_double3 *mat = matrix.columns; + double trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + double r = 2*sqrt(1 + trace); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + double rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + double r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +static SIMD_NOINLINE simd_quatd simd_quaternion(simd_double4x4 matrix) { + const simd_double4 *mat = matrix.columns; + double trace = mat[0][0] + mat[1][1] + mat[2][2]; + if (trace >= 0.0) { + double r = 2*sqrt(1 + trace); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[1][2] - mat[2][1]), + rinv*(mat[2][0] - mat[0][2]), + rinv*(mat[0][1] - mat[1][0]), + r/4); + } else if (mat[0][0] >= mat[1][1] && mat[0][0] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[1][1] - mat[2][2] + mat[0][0]); + double rinv = simd_recip(r); + return simd_quaternion(r/4, + rinv*(mat[0][1] + mat[1][0]), + rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] - mat[2][1])); + } else if (mat[1][1] >= mat[2][2]) { + double r = 2*sqrt(1 - mat[0][0] - mat[2][2] + mat[1][1]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][1] + mat[1][0]), + r/4, + rinv*(mat[1][2] + mat[2][1]), + rinv*(mat[2][0] - mat[0][2])); + } else { + double r = 2*sqrt(1 - mat[0][0] - mat[1][1] + mat[2][2]); + double rinv = simd_recip(r); + return simd_quaternion(rinv*(mat[0][2] + mat[2][0]), + rinv*(mat[1][2] + mat[2][1]), + r/4, + rinv*(mat[0][1] - mat[1][0])); + } +} + +/*! @abstract The angle between p and q interpreted as 4-dimensional vectors. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE double _simd_angle(simd_quatd p, simd_quatd q) { + return 2*atan2(simd_length(p.vector - q.vector), simd_length(p.vector + q.vector)); +} + +/*! @abstract sin(x)/x. + * + * @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_CFUNC double _simd_sinc(double x) { + if (x == 0) return 1; + return sin(x)/x; +} + +/*! @abstract Spherical lerp between q0 and q1. + * + * @discussion This function may interpolate along either the longer or + * shorter path between q0 and q1; it is used as an implementation detail + * in `simd_slerp` and `simd_slerp_longest`; you should use those functions + * instead of calling this directly. */ +static SIMD_NOINLINE simd_quatd _simd_slerp_internal(simd_quatd q0, simd_quatd q1, double t) { + double s = 1 - t; + double a = _simd_angle(q0, q1); + double r = simd_recip(_simd_sinc(a)); + return simd_normalize(simd_quaternion(_simd_sinc(s*a)*r*s*q0.vector + _simd_sinc(t*a)*r*t*q1.vector)); +} + +static SIMD_NOINLINE simd_quatd simd_slerp(simd_quatd q0, simd_quatd q1, double t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, q1, t); + return _simd_slerp_internal(q0, simd_negate(q1), t); +} + +static SIMD_NOINLINE simd_quatd simd_slerp_longest(simd_quatd q0, simd_quatd q1, double t) { + if (simd_dot(q0, q1) >= 0) + return _simd_slerp_internal(q0, simd_negate(q1), t); + return _simd_slerp_internal(q0, q1, t); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatd _simd_intermediate(simd_quatd q0, simd_quatd q1, simd_quatd q2) { + simd_quatd p0 = __tg_log(simd_mul(q0, simd_inverse(q1))); + simd_quatd p2 = __tg_log(simd_mul(q2, simd_inverse(q1))); + return simd_normalize(simd_mul(q1, __tg_exp(simd_mul(-0.25, simd_add(p0,p2))))); +} + +/*! @discussion This function is an implementation detail and you should not + * call it directly. It may be removed or modified in future versions of the + * simd module. */ +static SIMD_NOINLINE simd_quatd _simd_squad(simd_quatd q0, simd_quatd qa, simd_quatd qb, simd_quatd q1, double t) { + simd_quatd r0 = _simd_slerp_internal(q0, q1, t); + simd_quatd r1 = _simd_slerp_internal(qa, qb, t); + return _simd_slerp_internal(r0, r1, 2*t*(1 - t)); +} + +static SIMD_NOINLINE simd_quatd simd_spline(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t) { + simd_quatd qa = _simd_intermediate(q0, q1, q2); + simd_quatd qb = _simd_intermediate(q1, q2, q3); + return _simd_squad(q1, qa, qb, q2, t); +} + +static SIMD_NOINLINE simd_quatd simd_bezier(simd_quatd q0, simd_quatd q1, simd_quatd q2, simd_quatd q3, double t) { + simd_quatd q01 = _simd_slerp_internal(q0, q1, t); + simd_quatd q12 = _simd_slerp_internal(q1, q2, t); + simd_quatd q23 = _simd_slerp_internal(q2, q3, t); + simd_quatd q012 = _simd_slerp_internal(q01, q12, t); + simd_quatd q123 = _simd_slerp_internal(q12, q23, t); + return _simd_slerp_internal(q012, q123, t); +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif /* __cplusplus */ +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_QUATERNIONS */ diff --git a/vfsoverlay/simd.h b/vfsoverlay/simd.h new file mode 100644 index 00000000..fd566bf4 --- /dev/null +++ b/vfsoverlay/simd.h @@ -0,0 +1,30 @@ +/* Copyright (c) 2014 Apple, Inc. All rights reserved. + * + * This header provides small vector (simd) and matrix types, and basic + * arithmetic and mathematical functions for them. The vast majority of these + * operations are implemented as header inlines, as they can be performed + * using just a few instructions on most processors. + * + * These functions are broken into two groups; vector and matrix. This header + * includes all of them, but these may also be included separately. Consult + * these two headers for detailed documentation of what types and operations + * are available. + */ + +#ifndef __SIMD_HEADER__ +#define __SIMD_HEADER__ + +#if __has_include() +#include +REALTIME_SAFE_BEGIN +#endif + +#include +#include +#include + +#if __has_include() +REALTIME_SAFE_END +#endif + +#endif diff --git a/vfsoverlay/types.h b/vfsoverlay/types.h new file mode 100644 index 00000000..e0944670 --- /dev/null +++ b/vfsoverlay/types.h @@ -0,0 +1,128 @@ +/*! @header + * @copyright 2015-2016 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_TYPES +#define SIMD_TYPES + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES + +/*! @group Matrices + * @discussion + * This header defines nine matrix types for each of float and double, which + * are intended for use together with the vector types defined in + * . + * + * For compatibility with common graphics libraries, these matrices are stored + * in column-major order, and implemented as arrays of column vectors. + * Column-major storage order may seem a little strange if you aren't used to + * it, but for most usage the memory layout of the matrices shouldn't matter + * at all; instead you should think of matrices as abstract mathematical + * objects that you use to perform arithmetic without worrying about the + * details of the underlying representation. + * + * WARNING: vectors of length three are internally represented as length four + * vectors with one element of padding (for alignment purposes). This means + * that when a floatNx3 or doubleNx3 is viewed as a vector, it appears to + * have 4*N elements instead of the expected 3*N (with one padding element + * at the end of each column). The matrix elements are laid out in memory + * as follows: + * + * { 0, 1, 2, x, 3, 4, 5, x, ... } + * + * (where the scalar indices used above indicate the conceptual column- + * major storage order). If you aren't monkeying around with the internal + * storage details of matrices, you don't need to worry about this at all. + * Consider this yet another good reason to avoid doing so. */ + +/*! @abstract A matrix with 2 rows and 2 columns. */ +typedef struct { simd_float2 columns[2]; } simd_float2x2; + +/*! @abstract A matrix with 2 rows and 3 columns. */ +typedef struct { simd_float2 columns[3]; } simd_float3x2; + +/*! @abstract A matrix with 2 rows and 4 columns. */ +typedef struct { simd_float2 columns[4]; } simd_float4x2; + +/*! @abstract A matrix with 3 rows and 2 columns. */ +typedef struct { simd_float3 columns[2]; } simd_float2x3; + +/*! @abstract A matrix with 3 rows and 3 columns. */ +typedef struct { simd_float3 columns[3]; } simd_float3x3; + +/*! @abstract A matrix with 3 rows and 4 columns. */ +typedef struct { simd_float3 columns[4]; } simd_float4x3; + +/*! @abstract A matrix with 4 rows and 2 columns. */ +typedef struct { simd_float4 columns[2]; } simd_float2x4; + +/*! @abstract A matrix with 4 rows and 3 columns. */ +typedef struct { simd_float4 columns[3]; } simd_float3x4; + +/*! @abstract A matrix with 4 rows and 4 columns. */ +typedef struct { simd_float4 columns[4]; } simd_float4x4; + +/*! @abstract A matrix with 2 rows and 2 columns. */ +typedef struct { simd_double2 columns[2]; } simd_double2x2; + +/*! @abstract A matrix with 2 rows and 3 columns. */ +typedef struct { simd_double2 columns[3]; } simd_double3x2; + +/*! @abstract A matrix with 2 rows and 4 columns. */ +typedef struct { simd_double2 columns[4]; } simd_double4x2; + +/*! @abstract A matrix with 3 rows and 2 columns. */ +typedef struct { simd_double3 columns[2]; } simd_double2x3; + +/*! @abstract A matrix with 3 rows and 3 columns. */ +typedef struct { simd_double3 columns[3]; } simd_double3x3; + +/*! @abstract A matrix with 3 rows and 4 columns. */ +typedef struct { simd_double3 columns[4]; } simd_double4x3; + +/*! @abstract A matrix with 4 rows and 2 columns. */ +typedef struct { simd_double4 columns[2]; } simd_double2x4; + +/*! @abstract A matrix with 4 rows and 3 columns. */ +typedef struct { simd_double4 columns[3]; } simd_double3x4; + +/*! @abstract A matrix with 4 rows and 4 columns. */ +typedef struct { simd_double4 columns[4]; } simd_double4x4; + + +/*! @group Quaternions + * @discussion Unlike vectors, quaternions are not raw clang extended-vector + * types, because if they were you'd be able to intermix them with vectors + * in arithmetic operations freely, but the arithmetic would not do what you + * want it to do (it would simply perform the arithmetic operation + * componentwise on the quaternion and vector). + * + * Quaternions aren't unions in C/Obj-C, because then the C++ types couldn't + * inherit from the C types, which would make intermixing rather painful (you + * can't inherit from a union). This means that we can't provide nice member + * access like .real and .imag; you need to use functions to access the pieces + * of a quaternion instead. + * + * This also means that you need to use functions instead of operators to do + * arithmetic with quaternions in C and Obj-C. In C++, we are able to provide + * operator overloads for arithmetic. + * + * Internally, a quaternion is represented as a vector of four elements. The + * first three elements are the "imaginary" (or "vector") part of the + * quaternion, and the last element is the "real" (or "scalar") part. As with + * everything simd, you will generally get better performance if you avoid + * using the internal storage details of the type, and instead treat these + * quaternions as abstract mathematical objects once they are created. + * + * While the C types are defined here, the operations on quaternions and the + * C++ quaternion types are defined in */ + +/*! @abstract A single-precision quaternion. */ +typedef struct { simd_float4 vector; } simd_quatf; + +/*! @abstract A double-precision quaternion. */ +typedef struct { simd_double4 vector; } simd_quatd; + +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_TYPES */ diff --git a/vfsoverlay/vector.h b/vfsoverlay/vector.h new file mode 100644 index 00000000..7ab8f2ad --- /dev/null +++ b/vfsoverlay/vector.h @@ -0,0 +1,52 @@ +/* Copyright (c) 2014 Apple, Inc. All rights reserved. + * + * This header provides small vector (simd) types and basic arithmetic and + * math functions that operate on them. + * + * A wide assortment of vector types are provided in , + * which is included by this header. The most important (as far as the rest + * of this library is concerned) are vector_floatN (where N is 2, 3, 4, 8, or + * 16), and vector_doubleN (where N is 2, 3, 4, or 8). + * + * All of the vector types are based on what clang call "OpenCL vectors", + * defined with the __ext_vector_type__ attribute. Many C operators "just + * work" with these types, so it is not necessary to make function calls + * to do basic arithmetic: + * + * simd_float4 x, y; + * x = x + y; // vector sum of x and y. + * + * scalar values are implicitly promoted to vectors (with a "splat"), so it + * is possible to easily write expressions involving scalars as well: + * + * simd_float4 x; + * x = 2*x; // scale x by 2. + * + * Besides the basic operations provided by the compiler, this header provides + * a set of mathematical and geometric primitives for use with these types. + * In C and Objective-C, these functions are prefixed with vector_; in C++, + * unprefixed names are available within the simd:: namespace. + * + * simd_float3 x, y; + * vector_max(x,y) // elementwise maximum of x and y + * fabs(x) // same as vector_abs(x) + * vector_clamp(x,0,1) // x clamped to the range [0,1]. This has no + * // standard-library analogue, so there is no + * // alternate name. + * + * Matrix and matrix-vector operations are also available in . + */ + +#ifndef __SIMD_VECTOR_HEADER__ +#define __SIMD_VECTOR_HEADER__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#endif diff --git a/vfsoverlay/vector_make.h b/vfsoverlay/vector_make.h new file mode 100644 index 00000000..73b95fa6 --- /dev/null +++ b/vfsoverlay/vector_make.h @@ -0,0 +1,7874 @@ +/*! @header + * This header defines functions for constructing, extending, and truncating + * simd vector types. + * + * For each vector type `simd_typeN` supported by , the following + * constructors are provided: + * + * ~~~ + * simd_typeN simd_make_typeN(type other); + * simd_typeN simd_make_typeN(simd_typeM other); + * ~~~ + * For the scalar-input version, or if M < N, these functions zero-extend + * `other` to produce a wider vector. If M == N, `other` is passed through + * unmodified. If `M > N`, `other` is truncated to form the result. + * + * ~~~ + * simd_typeN simd_make_typeN_undef(type other); + * simd_typeN simd_make_typeN_undef(simd_typeM other); + * ~~~ + * These functions are only available for M < N and for scalar inputs. They + * extend `other` to produce a wider vector where the contents of the newly- + * formed lanes are undefined. + * + * In addition, if N is 2, 3, or 4, the following constructors are available: + * ~~~ + * simd_make_typeN(parts ...) + * ~~~ + * where parts is a list of scalars and smaller vectors such that the sum of + * the number of lanes in the arguments is equal to N. For example, a + * `simd_float3` can be constructed from three `floats`, or a `float` and a + * `simd_float2` in any order: + * ~~~ + * simd_float2 ab = { 1, 2 }; + * simd_float3 vector = simd_make_float3(ab, 3); + * ~~~ + * + * In C++ the above functions are templated in the simd:: namespace. + * + * C++ Function Equivalent C Function + * ------------------------------------------------------------------- + * simd::make(x ...) simd_make_typeN(x ...) + * simd::make_undef(x ...) simd_make_typeN_undef(x ...) + * + * + * In addition, templated Vector struct is available for + * templated code based on the scalar type. + * + * template struct simd::Vector { + * // static const size_t count + * // typedef scalar_t + * // typedef type + * // typedef packed_t + * }; + * + * Lookup the equivalent Vector struct according to typeN: + * template struct simd::get_traits + * { + * // using type = Vector; + * }; + * + * This is commonly used to get the type traits of typeN, so a helper type, + * namely traits, is available to query the type traits easily. + * simd::traits::count + * simd::traits::scalar_t + * + * @copyright 2014-2016 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_VECTOR_CONSTRUCTORS +#define SIMD_VECTOR_CONSTRUCTORS + +#include +#if SIMD_COMPILER_HAS_REQUIRED_FEATURES + +#ifdef __cplusplus +extern "C" { +#endif + +/*! @abstract Concatenates `x` and `y` to form a vector of two 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(char x, char y) { + simd_char2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(char other) { + simd_char2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2_undef(char other) { + simd_char2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(simd_char2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(simd_char3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(simd_char4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(simd_char8 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(simd_char16 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(simd_char32 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char2 simd_make_char2(simd_char64 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(char x, char y, char z) { + simd_char3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(char x, simd_char2 yz) { + simd_char3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char2 xy, char z) { + simd_char3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(char other) { + simd_char3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3_undef(char other) { + simd_char3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char2 other) { + simd_char3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3_undef(simd_char2 other) { + simd_char3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char8 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char16 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char32 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char3 simd_make_char3(simd_char64 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 8-bit signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(char x, char y, char z, char w) { + simd_char4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(char x, char y, simd_char2 zw) { + simd_char4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(char x, simd_char2 yz, char w) { + simd_char4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char2 xy, char z, char w) { + simd_char4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(char x, simd_char3 yzw) { + simd_char4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char2 xy, simd_char2 zw) { + simd_char4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char3 xyz, char w) { + simd_char4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(char other) { + simd_char4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4_undef(char other) { + simd_char4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char2 other) { + simd_char4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4_undef(simd_char2 other) { + simd_char4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char3 other) { + simd_char4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4_undef(simd_char3 other) { + simd_char4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char8 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char16 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char32 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_char4 simd_make_char4(simd_char64 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char4 lo, simd_char4 hi) { + simd_char8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(char other) { + simd_char8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8_undef(char other) { + simd_char8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char2 other) { + simd_char8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8_undef(simd_char2 other) { + simd_char8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char3 other) { + simd_char8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8_undef(simd_char3 other) { + simd_char8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char4 other) { + simd_char8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8_undef(simd_char4 other) { + simd_char8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char8 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of eight 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char16 other) { + return simd_make_char8(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of eight 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char32 other) { + return simd_make_char8(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of eight 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char8 simd_make_char8(simd_char64 other) { + return simd_make_char8(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char8 lo, simd_char8 hi) { + simd_char16 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(char other) { + simd_char16 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16_undef(char other) { + simd_char16 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char2 other) { + simd_char16 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16_undef(simd_char2 other) { + simd_char16 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char3 other) { + simd_char16 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16_undef(simd_char3 other) { + simd_char16 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char4 other) { + simd_char16 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16_undef(simd_char4 other) { + simd_char16 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char8 other) { + simd_char16 result = 0; + result.lo = simd_make_char8(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16_undef(simd_char8 other) { + simd_char16 result; + result.lo = simd_make_char8(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char16 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char32 other) { + return simd_make_char16(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char16 simd_make_char16(simd_char64 other) { + return simd_make_char16(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 8-bit signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char16 lo, simd_char16 hi) { + simd_char32 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(char other) { + simd_char32 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32_undef(char other) { + simd_char32 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char2 other) { + simd_char32 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32_undef(simd_char2 other) { + simd_char32 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char3 other) { + simd_char32 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32_undef(simd_char3 other) { + simd_char32 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char4 other) { + simd_char32 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32_undef(simd_char4 other) { + simd_char32 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char8 other) { + simd_char32 result = 0; + result.lo = simd_make_char16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32_undef(simd_char8 other) { + simd_char32 result; + result.lo = simd_make_char16(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char16 other) { + simd_char32 result = 0; + result.lo = simd_make_char16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32_undef(simd_char16 other) { + simd_char32 result; + result.lo = simd_make_char16(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char32 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char32 simd_make_char32(simd_char64 other) { + return simd_make_char32(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixty-four + * 8-bit signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char32 lo, simd_char32 hi) { + simd_char64 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(char other) { + simd_char64 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64_undef(char other) { + simd_char64 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char2 other) { + simd_char64 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64_undef(simd_char2 other) { + simd_char64 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char3 other) { + simd_char64 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64_undef(simd_char3 other) { + simd_char64 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char4 other) { + simd_char64 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64_undef(simd_char4 other) { + simd_char64 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char8 other) { + simd_char64 result = 0; + result.lo = simd_make_char32(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64_undef(simd_char8 other) { + simd_char64 result; + result.lo = simd_make_char32(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char16 other) { + simd_char64 result = 0; + result.lo = simd_make_char32(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64_undef(simd_char16 other) { + simd_char64 result; + result.lo = simd_make_char32(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char32 other) { + simd_char64 result = 0; + result.lo = simd_make_char32(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64_undef(simd_char32 other) { + simd_char64 result; + result.lo = simd_make_char32(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_char64 simd_make_char64(simd_char64 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(unsigned char x, unsigned char y) { + simd_uchar2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(unsigned char other) { + simd_uchar2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2_undef(unsigned char other) { + simd_uchar2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(simd_uchar2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(simd_uchar3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(simd_uchar4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(simd_uchar8 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(simd_uchar16 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(simd_uchar32 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar2 simd_make_uchar2(simd_uchar64 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(unsigned char x, unsigned char y, unsigned char z) { + simd_uchar3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(unsigned char x, simd_uchar2 yz) { + simd_uchar3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar2 xy, unsigned char z) { + simd_uchar3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(unsigned char other) { + simd_uchar3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3_undef(unsigned char other) { + simd_uchar3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar2 other) { + simd_uchar3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3_undef(simd_uchar2 other) { + simd_uchar3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar8 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar16 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar32 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar3 simd_make_uchar3(simd_uchar64 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 8-bit unsigned integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w) { + simd_uchar4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(unsigned char x, unsigned char y, simd_uchar2 zw) { + simd_uchar4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(unsigned char x, simd_uchar2 yz, unsigned char w) { + simd_uchar4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar2 xy, unsigned char z, unsigned char w) { + simd_uchar4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(unsigned char x, simd_uchar3 yzw) { + simd_uchar4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar2 xy, simd_uchar2 zw) { + simd_uchar4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar3 xyz, unsigned char w) { + simd_uchar4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(unsigned char other) { + simd_uchar4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4_undef(unsigned char other) { + simd_uchar4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar2 other) { + simd_uchar4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4_undef(simd_uchar2 other) { + simd_uchar4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar3 other) { + simd_uchar4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4_undef(simd_uchar3 other) { + simd_uchar4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar8 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar16 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar32 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar4 simd_make_uchar4(simd_uchar64 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar4 lo, simd_uchar4 hi) { + simd_uchar8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(unsigned char other) { + simd_uchar8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8_undef(unsigned char other) { + simd_uchar8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar2 other) { + simd_uchar8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8_undef(simd_uchar2 other) { + simd_uchar8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar3 other) { + simd_uchar8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8_undef(simd_uchar3 other) { + simd_uchar8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar4 other) { + simd_uchar8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8_undef(simd_uchar4 other) { + simd_uchar8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar8 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of eight 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar16 other) { + return simd_make_uchar8(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of eight 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar32 other) { + return simd_make_uchar8(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of eight 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar8 simd_make_uchar8(simd_uchar64 other) { + return simd_make_uchar8(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar8 lo, simd_uchar8 hi) { + simd_uchar16 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(unsigned char other) { + simd_uchar16 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16_undef(unsigned char other) { + simd_uchar16 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar2 other) { + simd_uchar16 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16_undef(simd_uchar2 other) { + simd_uchar16 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar3 other) { + simd_uchar16 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16_undef(simd_uchar3 other) { + simd_uchar16 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar4 other) { + simd_uchar16 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16_undef(simd_uchar4 other) { + simd_uchar16 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar8 other) { + simd_uchar16 result = 0; + result.lo = simd_make_uchar8(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16_undef(simd_uchar8 other) { + simd_uchar16 result; + result.lo = simd_make_uchar8(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar16 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of sixteen 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar32 other) { + return simd_make_uchar16(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of sixteen 8-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uchar16 simd_make_uchar16(simd_uchar64 other) { + return simd_make_uchar16(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 8-bit unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar16 lo, simd_uchar16 hi) { + simd_uchar32 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(unsigned char other) { + simd_uchar32 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32_undef(unsigned char other) { + simd_uchar32 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar2 other) { + simd_uchar32 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32_undef(simd_uchar2 other) { + simd_uchar32 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar3 other) { + simd_uchar32 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32_undef(simd_uchar3 other) { + simd_uchar32 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar4 other) { + simd_uchar32 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32_undef(simd_uchar4 other) { + simd_uchar32 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar8 other) { + simd_uchar32 result = 0; + result.lo = simd_make_uchar16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32_undef(simd_uchar8 other) { + simd_uchar32 result; + result.lo = simd_make_uchar16(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar16 other) { + simd_uchar32 result = 0; + result.lo = simd_make_uchar16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32_undef(simd_uchar16 other) { + simd_uchar32 result; + result.lo = simd_make_uchar16(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar32 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of thirty-two 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar32 simd_make_uchar32(simd_uchar64 other) { + return simd_make_uchar32(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixty-four + * 8-bit unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar32 lo, simd_uchar32 hi) { + simd_uchar64 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(unsigned char other) { + simd_uchar64 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64_undef(unsigned char other) { + simd_uchar64 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar2 other) { + simd_uchar64 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64_undef(simd_uchar2 other) { + simd_uchar64 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar3 other) { + simd_uchar64 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64_undef(simd_uchar3 other) { + simd_uchar64 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar4 other) { + simd_uchar64 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64_undef(simd_uchar4 other) { + simd_uchar64 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar8 other) { + simd_uchar64 result = 0; + result.lo = simd_make_uchar32(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64_undef(simd_uchar8 other) { + simd_uchar64 result; + result.lo = simd_make_uchar32(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar16 other) { + simd_uchar64 result = 0; + result.lo = simd_make_uchar32(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64_undef(simd_uchar16 other) { + simd_uchar64 result; + result.lo = simd_make_uchar32(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixty-four 8-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar32 other) { + simd_uchar64 result = 0; + result.lo = simd_make_uchar32(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64_undef(simd_uchar32 other) { + simd_uchar64 result; + result.lo = simd_make_uchar32(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uchar64 simd_make_uchar64(simd_uchar64 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(short x, short y) { + simd_short2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(short other) { + simd_short2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2_undef(short other) { + simd_short2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(simd_short2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(simd_short3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(simd_short4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(simd_short8 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(simd_short16 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_short2 simd_make_short2(simd_short32 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(short x, short y, short z) { + simd_short3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(short x, simd_short2 yz) { + simd_short3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(simd_short2 xy, short z) { + simd_short3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(short other) { + simd_short3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3_undef(short other) { + simd_short3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(simd_short2 other) { + simd_short3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3_undef(simd_short2 other) { + simd_short3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(simd_short3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(simd_short4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(simd_short8 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(simd_short16 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short3 simd_make_short3(simd_short32 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 16-bit signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(short x, short y, short z, short w) { + simd_short4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(short x, short y, simd_short2 zw) { + simd_short4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(short x, simd_short2 yz, short w) { + simd_short4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short2 xy, short z, short w) { + simd_short4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(short x, simd_short3 yzw) { + simd_short4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short2 xy, simd_short2 zw) { + simd_short4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short3 xyz, short w) { + simd_short4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(short other) { + simd_short4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4_undef(short other) { + simd_short4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short2 other) { + simd_short4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4_undef(simd_short2 other) { + simd_short4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short3 other) { + simd_short4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4_undef(simd_short3 other) { + simd_short4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short8 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short16 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short4 simd_make_short4(simd_short32 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(simd_short4 lo, simd_short4 hi) { + simd_short8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(short other) { + simd_short8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8_undef(short other) { + simd_short8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(simd_short2 other) { + simd_short8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8_undef(simd_short2 other) { + simd_short8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(simd_short3 other) { + simd_short8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8_undef(simd_short3 other) { + simd_short8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(simd_short4 other) { + simd_short8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8_undef(simd_short4 other) { + simd_short8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(simd_short8 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of eight 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(simd_short16 other) { + return simd_make_short8(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of eight 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short8 simd_make_short8(simd_short32 other) { + return simd_make_short8(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(simd_short8 lo, simd_short8 hi) { + simd_short16 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(short other) { + simd_short16 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16_undef(short other) { + simd_short16 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(simd_short2 other) { + simd_short16 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16_undef(simd_short2 other) { + simd_short16 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(simd_short3 other) { + simd_short16 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16_undef(simd_short3 other) { + simd_short16 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(simd_short4 other) { + simd_short16 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16_undef(simd_short4 other) { + simd_short16 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(simd_short8 other) { + simd_short16 result = 0; + result.lo = simd_make_short8(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16_undef(simd_short8 other) { + simd_short16 result; + result.lo = simd_make_short8(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(simd_short16 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short16 simd_make_short16(simd_short32 other) { + return simd_make_short16(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 16-bit signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(simd_short16 lo, simd_short16 hi) { + simd_short32 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(short other) { + simd_short32 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32_undef(short other) { + simd_short32 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(simd_short2 other) { + simd_short32 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32_undef(simd_short2 other) { + simd_short32 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(simd_short3 other) { + simd_short32 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32_undef(simd_short3 other) { + simd_short32 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(simd_short4 other) { + simd_short32 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32_undef(simd_short4 other) { + simd_short32 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(simd_short8 other) { + simd_short32 result = 0; + result.lo = simd_make_short16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32_undef(simd_short8 other) { + simd_short32 result; + result.lo = simd_make_short16(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(simd_short16 other) { + simd_short32 result = 0; + result.lo = simd_make_short16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32_undef(simd_short16 other) { + simd_short32 result; + result.lo = simd_make_short16(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_short32 simd_make_short32(simd_short32 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(unsigned short x, unsigned short y) { + simd_ushort2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(unsigned short other) { + simd_ushort2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2_undef(unsigned short other) { + simd_ushort2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(simd_ushort2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(simd_ushort3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(simd_ushort4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(simd_ushort8 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(simd_ushort16 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort2 simd_make_ushort2(simd_ushort32 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(unsigned short x, unsigned short y, unsigned short z) { + simd_ushort3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(unsigned short x, simd_ushort2 yz) { + simd_ushort3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(simd_ushort2 xy, unsigned short z) { + simd_ushort3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(unsigned short other) { + simd_ushort3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3_undef(unsigned short other) { + simd_ushort3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(simd_ushort2 other) { + simd_ushort3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3_undef(simd_ushort2 other) { + simd_ushort3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(simd_ushort3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(simd_ushort4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(simd_ushort8 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(simd_ushort16 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort3 simd_make_ushort3(simd_ushort32 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 16-bit unsigned integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) { + simd_ushort4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(unsigned short x, unsigned short y, simd_ushort2 zw) { + simd_ushort4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(unsigned short x, simd_ushort2 yz, unsigned short w) { + simd_ushort4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort2 xy, unsigned short z, unsigned short w) { + simd_ushort4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(unsigned short x, simd_ushort3 yzw) { + simd_ushort4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort2 xy, simd_ushort2 zw) { + simd_ushort4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort3 xyz, unsigned short w) { + simd_ushort4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(unsigned short other) { + simd_ushort4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4_undef(unsigned short other) { + simd_ushort4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort2 other) { + simd_ushort4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4_undef(simd_ushort2 other) { + simd_ushort4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort3 other) { + simd_ushort4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4_undef(simd_ushort3 other) { + simd_ushort4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort8 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort16 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort4 simd_make_ushort4(simd_ushort32 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(simd_ushort4 lo, simd_ushort4 hi) { + simd_ushort8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(unsigned short other) { + simd_ushort8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8_undef(unsigned short other) { + simd_ushort8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(simd_ushort2 other) { + simd_ushort8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8_undef(simd_ushort2 other) { + simd_ushort8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(simd_ushort3 other) { + simd_ushort8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8_undef(simd_ushort3 other) { + simd_ushort8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(simd_ushort4 other) { + simd_ushort8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8_undef(simd_ushort4 other) { + simd_ushort8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(simd_ushort8 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of eight 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(simd_ushort16 other) { + return simd_make_ushort8(other.lo); +} + +/*! @abstract Truncates `other` to form a vector of eight 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort8 simd_make_ushort8(simd_ushort32 other) { + return simd_make_ushort8(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(simd_ushort8 lo, simd_ushort8 hi) { + simd_ushort16 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(unsigned short other) { + simd_ushort16 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16_undef(unsigned short other) { + simd_ushort16 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(simd_ushort2 other) { + simd_ushort16 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16_undef(simd_ushort2 other) { + simd_ushort16 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(simd_ushort3 other) { + simd_ushort16 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16_undef(simd_ushort3 other) { + simd_ushort16 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(simd_ushort4 other) { + simd_ushort16 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16_undef(simd_ushort4 other) { + simd_ushort16 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(simd_ushort8 other) { + simd_ushort16 result = 0; + result.lo = simd_make_ushort8(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16_undef(simd_ushort8 other) { + simd_ushort16 result; + result.lo = simd_make_ushort8(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(simd_ushort16 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of sixteen 16-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ushort16 simd_make_ushort16(simd_ushort32 other) { + return simd_make_ushort16(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 16-bit unsigned integers. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(simd_ushort16 lo, simd_ushort16 hi) { + simd_ushort32 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(unsigned short other) { + simd_ushort32 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32_undef(unsigned short other) { + simd_ushort32 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(simd_ushort2 other) { + simd_ushort32 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32_undef(simd_ushort2 other) { + simd_ushort32 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(simd_ushort3 other) { + simd_ushort32 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32_undef(simd_ushort3 other) { + simd_ushort32 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(simd_ushort4 other) { + simd_ushort32 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32_undef(simd_ushort4 other) { + simd_ushort32 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(simd_ushort8 other) { + simd_ushort32 result = 0; + result.lo = simd_make_ushort16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32_undef(simd_ushort8 other) { + simd_ushort32 result; + result.lo = simd_make_ushort16(other); + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of thirty-two 16-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(simd_ushort16 other) { + simd_ushort32 result = 0; + result.lo = simd_make_ushort16(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32_undef(simd_ushort16 other) { + simd_ushort32 result; + result.lo = simd_make_ushort16(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ushort32 simd_make_ushort32(simd_ushort32 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2(int x, int y) { + simd_int2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2(int other) { + simd_int2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2_undef(int other) { + simd_int2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2(simd_int2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2(simd_int3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2(simd_int4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2(simd_int8 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_int2 simd_make_int2(simd_int16 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(int x, int y, int z) { + simd_int3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(int x, simd_int2 yz) { + simd_int3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(simd_int2 xy, int z) { + simd_int3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(int other) { + simd_int3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3_undef(int other) { + simd_int3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(simd_int2 other) { + simd_int3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3_undef(simd_int2 other) { + simd_int3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(simd_int3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(simd_int4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(simd_int8 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int3 simd_make_int3(simd_int16 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 32-bit signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(int x, int y, int z, int w) { + simd_int4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(int x, int y, simd_int2 zw) { + simd_int4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(int x, simd_int2 yz, int w) { + simd_int4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int2 xy, int z, int w) { + simd_int4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(int x, simd_int3 yzw) { + simd_int4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int2 xy, simd_int2 zw) { + simd_int4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int3 xyz, int w) { + simd_int4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(int other) { + simd_int4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4_undef(int other) { + simd_int4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int2 other) { + simd_int4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4_undef(simd_int2 other) { + simd_int4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int3 other) { + simd_int4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4_undef(simd_int3 other) { + simd_int4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int8 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int4 simd_make_int4(simd_int16 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8(simd_int4 lo, simd_int4 hi) { + simd_int8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8(int other) { + simd_int8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8_undef(int other) { + simd_int8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8(simd_int2 other) { + simd_int8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8_undef(simd_int2 other) { + simd_int8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8(simd_int3 other) { + simd_int8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8_undef(simd_int3 other) { + simd_int8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8(simd_int4 other) { + simd_int8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8_undef(simd_int4 other) { + simd_int8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8(simd_int8 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of eight 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int8 simd_make_int8(simd_int16 other) { + return simd_make_int8(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16(simd_int8 lo, simd_int8 hi) { + simd_int16 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16(int other) { + simd_int16 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16_undef(int other) { + simd_int16 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16(simd_int2 other) { + simd_int16 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16_undef(simd_int2 other) { + simd_int16 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16(simd_int3 other) { + simd_int16 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16_undef(simd_int3 other) { + simd_int16 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16(simd_int4 other) { + simd_int16 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16_undef(simd_int4 other) { + simd_int16 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16(simd_int8 other) { + simd_int16 result = 0; + result.lo = simd_make_int8(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16_undef(simd_int8 other) { + simd_int16 result; + result.lo = simd_make_int8(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_int16 simd_make_int16(simd_int16 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2(unsigned int x, unsigned int y) { + simd_uint2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2(unsigned int other) { + simd_uint2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2_undef(unsigned int other) { + simd_uint2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2(simd_uint2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2(simd_uint3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2(simd_uint4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2(simd_uint8 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint2 simd_make_uint2(simd_uint16 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(unsigned int x, unsigned int y, unsigned int z) { + simd_uint3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(unsigned int x, simd_uint2 yz) { + simd_uint3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(simd_uint2 xy, unsigned int z) { + simd_uint3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(unsigned int other) { + simd_uint3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3_undef(unsigned int other) { + simd_uint3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(simd_uint2 other) { + simd_uint3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3_undef(simd_uint2 other) { + simd_uint3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(simd_uint3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(simd_uint4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(simd_uint8 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint3 simd_make_uint3(simd_uint16 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 32-bit unsigned integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w) { + simd_uint4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(unsigned int x, unsigned int y, simd_uint2 zw) { + simd_uint4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(unsigned int x, simd_uint2 yz, unsigned int w) { + simd_uint4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint2 xy, unsigned int z, unsigned int w) { + simd_uint4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(unsigned int x, simd_uint3 yzw) { + simd_uint4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint2 xy, simd_uint2 zw) { + simd_uint4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint3 xyz, unsigned int w) { + simd_uint4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(unsigned int other) { + simd_uint4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4_undef(unsigned int other) { + simd_uint4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint2 other) { + simd_uint4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4_undef(simd_uint2 other) { + simd_uint4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint3 other) { + simd_uint4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4_undef(simd_uint3 other) { + simd_uint4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint8 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint4 simd_make_uint4(simd_uint16 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8(simd_uint4 lo, simd_uint4 hi) { + simd_uint8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8(unsigned int other) { + simd_uint8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8_undef(unsigned int other) { + simd_uint8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8(simd_uint2 other) { + simd_uint8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8_undef(simd_uint2 other) { + simd_uint8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8(simd_uint3 other) { + simd_uint8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8_undef(simd_uint3 other) { + simd_uint8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8(simd_uint4 other) { + simd_uint8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8_undef(simd_uint4 other) { + simd_uint8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8(simd_uint8 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of eight 32-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_uint8 simd_make_uint8(simd_uint16 other) { + return simd_make_uint8(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16(simd_uint8 lo, simd_uint8 hi) { + simd_uint16 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16(unsigned int other) { + simd_uint16 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16_undef(unsigned int other) { + simd_uint16 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16(simd_uint2 other) { + simd_uint16 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16_undef(simd_uint2 other) { + simd_uint16 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16(simd_uint3 other) { + simd_uint16 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16_undef(simd_uint3 other) { + simd_uint16 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16(simd_uint4 other) { + simd_uint16 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16_undef(simd_uint4 other) { + simd_uint16 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16(simd_uint8 other) { + simd_uint16 result = 0; + result.lo = simd_make_uint8(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16_undef(simd_uint8 other) { + simd_uint16 result; + result.lo = simd_make_uint8(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_uint16 simd_make_uint16(simd_uint16 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2(float x, float y) { + simd_float2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2(float other) { + simd_float2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 32-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2_undef(float other) { + simd_float2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2(simd_float2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2(simd_float3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2(simd_float4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2(simd_float8 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float2 simd_make_float2(simd_float16 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(float x, float y, float z) { + simd_float3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(float x, simd_float2 yz) { + simd_float3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(simd_float2 xy, float z) { + simd_float3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(float other) { + simd_float3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3_undef(float other) { + simd_float3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(simd_float2 other) { + simd_float3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3_undef(simd_float2 other) { + simd_float3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(simd_float3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(simd_float4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(simd_float8 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float3 simd_make_float3(simd_float16 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 32-bit floating-point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(float x, float y, float z, float w) { + simd_float4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(float x, float y, simd_float2 zw) { + simd_float4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(float x, simd_float2 yz, float w) { + simd_float4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float2 xy, float z, float w) { + simd_float4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(float x, simd_float3 yzw) { + simd_float4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float2 xy, simd_float2 zw) { + simd_float4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float3 xyz, float w) { + simd_float4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(float other) { + simd_float4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4_undef(float other) { + simd_float4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float2 other) { + simd_float4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4_undef(simd_float2 other) { + simd_float4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float3 other) { + simd_float4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 32-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4_undef(simd_float3 other) { + simd_float4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float8 other) { + return other.xyzw; +} + +/*! @abstract Truncates `other` to form a vector of four 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float4 simd_make_float4(simd_float16 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8(simd_float4 lo, simd_float4 hi) { + simd_float8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8(float other) { + simd_float8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8_undef(float other) { + simd_float8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8(simd_float2 other) { + simd_float8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8_undef(simd_float2 other) { + simd_float8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8(simd_float3 other) { + simd_float8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8_undef(simd_float3 other) { + simd_float8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8(simd_float4 other) { + simd_float8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8_undef(simd_float4 other) { + simd_float8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8(simd_float8 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of eight 32-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_float8 simd_make_float8(simd_float16 other) { + return simd_make_float8(other.lo); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16(simd_float8 lo, simd_float8 hi) { + simd_float16 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16(float other) { + simd_float16 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16_undef(float other) { + simd_float16 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16(simd_float2 other) { + simd_float16 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16_undef(simd_float2 other) { + simd_float16 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16(simd_float3 other) { + simd_float16 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16_undef(simd_float3 other) { + simd_float16 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16(simd_float4 other) { + simd_float16 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16_undef(simd_float4 other) { + simd_float16 result; + result.xyzw = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of sixteen 32-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16(simd_float8 other) { + simd_float16 result = 0; + result.lo = simd_make_float8(other); + return result; +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16_undef(simd_float8 other) { + simd_float16 result; + result.lo = simd_make_float8(other); + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_float16 simd_make_float16(simd_float16 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long2 simd_make_long2(simd_long1 x, simd_long1 y) { + simd_long2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long2 simd_make_long2(simd_long1 other) { + simd_long2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long2 simd_make_long2_undef(simd_long1 other) { + simd_long2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_long2 simd_make_long2(simd_long2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_long2 simd_make_long2(simd_long3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_long2 simd_make_long2(simd_long4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit signed (twos- + * complement) integers. */ +static inline SIMD_CFUNC simd_long2 simd_make_long2(simd_long8 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long1 x, simd_long1 y, simd_long1 z) { + simd_long3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long1 x, simd_long2 yz) { + simd_long3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long2 xy, simd_long1 z) { + simd_long3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long1 other) { + simd_long3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3_undef(simd_long1 other) { + simd_long3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long2 other) { + simd_long3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3_undef(simd_long2 other) { + simd_long3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long3 simd_make_long3(simd_long8 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 64-bit signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long1 x, simd_long1 y, simd_long1 z, simd_long1 w) { + simd_long4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long1 x, simd_long1 y, simd_long2 zw) { + simd_long4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long1 x, simd_long2 yz, simd_long1 w) { + simd_long4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long2 xy, simd_long1 z, simd_long1 w) { + simd_long4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long1 x, simd_long3 yzw) { + simd_long4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long2 xy, simd_long2 zw) { + simd_long4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long3 xyz, simd_long1 w) { + simd_long4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long1 other) { + simd_long4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4_undef(simd_long1 other) { + simd_long4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long2 other) { + simd_long4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4_undef(simd_long2 other) { + simd_long4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long3 other) { + simd_long4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4_undef(simd_long3 other) { + simd_long4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long4 simd_make_long4(simd_long8 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8(simd_long4 lo, simd_long4 hi) { + simd_long8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8(simd_long1 other) { + simd_long8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8_undef(simd_long1 other) { + simd_long8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8(simd_long2 other) { + simd_long8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8_undef(simd_long2 other) { + simd_long8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8(simd_long3 other) { + simd_long8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8_undef(simd_long3 other) { + simd_long8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8(simd_long4 other) { + simd_long8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8_undef(simd_long4 other) { + simd_long8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_long8 simd_make_long8(simd_long8 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong2 simd_make_ulong2(simd_ulong1 x, simd_ulong1 y) { + simd_ulong2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong2 simd_make_ulong2(simd_ulong1 other) { + simd_ulong2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong2 simd_make_ulong2_undef(simd_ulong1 other) { + simd_ulong2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ulong2 simd_make_ulong2(simd_ulong2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong2 simd_make_ulong2(simd_ulong3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong2 simd_make_ulong2(simd_ulong4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong2 simd_make_ulong2(simd_ulong8 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong1 x, simd_ulong1 y, simd_ulong1 z) { + simd_ulong3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong1 x, simd_ulong2 yz) { + simd_ulong3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong2 xy, simd_ulong1 z) { + simd_ulong3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong1 other) { + simd_ulong3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3_undef(simd_ulong1 other) { + simd_ulong3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong2 other) { + simd_ulong3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3_undef(simd_ulong2 other) { + simd_ulong3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong3 simd_make_ulong3(simd_ulong8 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 64-bit unsigned integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong1 x, simd_ulong1 y, simd_ulong1 z, simd_ulong1 w) { + simd_ulong4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong1 x, simd_ulong1 y, simd_ulong2 zw) { + simd_ulong4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong1 x, simd_ulong2 yz, simd_ulong1 w) { + simd_ulong4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong2 xy, simd_ulong1 z, simd_ulong1 w) { + simd_ulong4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong1 x, simd_ulong3 yzw) { + simd_ulong4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong2 xy, simd_ulong2 zw) { + simd_ulong4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong3 xyz, simd_ulong1 w) { + simd_ulong4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong1 other) { + simd_ulong4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4_undef(simd_ulong1 other) { + simd_ulong4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong2 other) { + simd_ulong4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4_undef(simd_ulong2 other) { + simd_ulong4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong3 other) { + simd_ulong4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4_undef(simd_ulong3 other) { + simd_ulong4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong4 simd_make_ulong4(simd_ulong8 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 64-bit + * unsigned integers. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8(simd_ulong4 lo, simd_ulong4 hi) { + simd_ulong8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8(simd_ulong1 other) { + simd_ulong8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8_undef(simd_ulong1 other) { + simd_ulong8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8(simd_ulong2 other) { + simd_ulong8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8_undef(simd_ulong2 other) { + simd_ulong8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8(simd_ulong3 other) { + simd_ulong8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8_undef(simd_ulong3 other) { + simd_ulong8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit unsigned + * integers. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8(simd_ulong4 other) { + simd_ulong8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8_undef(simd_ulong4 other) { + simd_ulong8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_ulong8 simd_make_ulong8(simd_ulong8 other) { + return other; +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double2 simd_make_double2(double x, double y) { + simd_double2 result; + result.x = x; + result.y = y; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of two 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double2 simd_make_double2(double other) { + simd_double2 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of two 64-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_double2 simd_make_double2_undef(double other) { + simd_double2 result; + result.x = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_double2 simd_make_double2(simd_double2 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double2 simd_make_double2(simd_double3 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double2 simd_make_double2(simd_double4 other) { + return other.xy; +} + +/*! @abstract Truncates `other` to form a vector of two 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double2 simd_make_double2(simd_double8 other) { + return other.xy; +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(double x, double y, double z) { + simd_double3 result; + result.x = x; + result.y = y; + result.z = z; + return result; +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(double x, simd_double2 yz) { + simd_double3 result; + result.x = x; + result.yz = yz; + return result; +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(simd_double2 xy, double z) { + simd_double3 result; + result.xy = xy; + result.z = z; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(double other) { + simd_double3 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3_undef(double other) { + simd_double3 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(simd_double2 other) { + simd_double3 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of three 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3_undef(simd_double2 other) { + simd_double3 result; + result.xy = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(simd_double3 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of three 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(simd_double4 other) { + return other.xyz; +} + +/*! @abstract Truncates `other` to form a vector of three 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double3 simd_make_double3(simd_double8 other) { + return other.xyz; +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 64-bit floating-point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(double x, double y, double z, double w) { + simd_double4 result; + result.x = x; + result.y = y; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(double x, double y, simd_double2 zw) { + simd_double4 result; + result.x = x; + result.y = y; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(double x, simd_double2 yz, double w) { + simd_double4 result; + result.x = x; + result.yz = yz; + result.w = w; + return result; +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(simd_double2 xy, double z, double w) { + simd_double4 result; + result.xy = xy; + result.z = z; + result.w = w; + return result; +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(double x, simd_double3 yzw) { + simd_double4 result; + result.x = x; + result.yzw = yzw; + return result; +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(simd_double2 xy, simd_double2 zw) { + simd_double4 result; + result.xy = xy; + result.zw = zw; + return result; +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(simd_double3 xyz, double w) { + simd_double4 result; + result.xyz = xyz; + result.w = w; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(double other) { + simd_double4 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4_undef(double other) { + simd_double4 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(simd_double2 other) { + simd_double4 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4_undef(simd_double2 other) { + simd_double4 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of four 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(simd_double3 other) { + simd_double4 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of four 64-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4_undef(simd_double3 other) { + simd_double4 result; + result.xyz = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(simd_double4 other) { + return other; +} + +/*! @abstract Truncates `other` to form a vector of four 64-bit floating- + * point numbers. */ +static inline SIMD_CFUNC simd_double4 simd_make_double4(simd_double8 other) { + return other.xyzw; +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8(simd_double4 lo, simd_double4 hi) { + simd_double8 result; + result.lo = lo; + result.hi = hi; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8(double other) { + simd_double8 result = 0; + result.x = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8_undef(double other) { + simd_double8 result; + result.x = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8(simd_double2 other) { + simd_double8 result = 0; + result.xy = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8_undef(simd_double2 other) { + simd_double8 result; + result.xy = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8(simd_double3 other) { + simd_double8 result = 0; + result.xyz = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8_undef(simd_double3 other) { + simd_double8 result; + result.xyz = other; + return result; +} + +/*! @abstract Zero-extends `other` to form a vector of eight 64-bit + * floating-point numbers. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8(simd_double4 other) { + simd_double8 result = 0; + result.xyzw = other; + return result; +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8_undef(simd_double4 other) { + simd_double8 result; + result.xyzw = other; + return result; +} + +/*! @abstract Returns `other` unmodified. This function is a convenience for + * templated and autogenerated code. */ +static inline SIMD_CFUNC simd_double8 simd_make_double8(simd_double8 other) { + return other; +} + +#ifdef __cplusplus +} /* extern "C" */ + +#include +#include + +namespace simd { +/*! @abstract Concatenates `x` and `y` to form a vector of two 8-bit signed + * (twos-complement) integers. */ +static inline SIMD_CPPFUNC char2 make_char2(char x, char y) { + return ::simd_make_char2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 8-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC char2 make_char2(typeN other) { + return ::simd_make_char2(other); +} + +/*! @abstract Extends `other` to form a vector of two 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC char2 make_char2_undef(typeN other) { + return ::simd_make_char2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char3 make_char3(char x, char y, char z) { + return ::simd_make_char3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char3 make_char3(char x, char2 yz) { + return ::simd_make_char3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char3 make_char3(char2 xy, char z) { + return ::simd_make_char3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 8-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC char3 make_char3(typeN other) { + return ::simd_make_char3(other); +} + +/*! @abstract Extends `other` to form a vector of three 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC char3 make_char3_undef(typeN other) { + return ::simd_make_char3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 8-bit signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char4 make_char4(char x, char y, char z, char w) { + return ::simd_make_char4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char4 make_char4(char x, char y, char2 zw) { + return ::simd_make_char4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char4 make_char4(char x, char2 yz, char w) { + return ::simd_make_char4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char4 make_char4(char2 xy, char z, char w) { + return ::simd_make_char4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char4 make_char4(char x, char3 yzw) { + return ::simd_make_char4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char4 make_char4(char2 xy, char2 zw) { + return ::simd_make_char4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char4 make_char4(char3 xyz, char w) { + return ::simd_make_char4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 8-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC char4 make_char4(typeN other) { + return ::simd_make_char4(other); +} + +/*! @abstract Extends `other` to form a vector of four 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC char4 make_char4_undef(typeN other) { + return ::simd_make_char4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char8 make_char8(char4 lo, char4 hi) { + return ::simd_make_char8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 8-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC char8 make_char8(typeN other) { + return ::simd_make_char8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC char8 make_char8_undef(typeN other) { + return ::simd_make_char8_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 8-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char16 make_char16(char8 lo, char8 hi) { + return ::simd_make_char16(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixteen + * 8-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC char16 make_char16(typeN other) { + return ::simd_make_char16(other); +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +template static SIMD_CPPFUNC char16 make_char16_undef(typeN other) { + return ::simd_make_char16_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 8-bit signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char32 make_char32(char16 lo, char16 hi) { + return ::simd_make_char32(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of thirty- + * two 8-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC char32 make_char32(typeN other) { + return ::simd_make_char32(other); +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +template static SIMD_CPPFUNC char32 make_char32_undef(typeN other) { + return ::simd_make_char32_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixty-four + * 8-bit signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC char64 make_char64(char32 lo, char32 hi) { + return ::simd_make_char64(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixty- + * four 8-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC char64 make_char64(typeN other) { + return ::simd_make_char64(other); +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +template static SIMD_CPPFUNC char64 make_char64_undef(typeN other) { + return ::simd_make_char64_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar2 make_uchar2(unsigned char x, unsigned char y) { + return ::simd_make_uchar2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 8-bit unsigned integers. */ +template static SIMD_CPPFUNC uchar2 make_uchar2(typeN other) { + return ::simd_make_uchar2(other); +} + +/*! @abstract Extends `other` to form a vector of two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uchar2 make_uchar2_undef(typeN other) { + return ::simd_make_uchar2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar3 make_uchar3(unsigned char x, unsigned char y, unsigned char z) { + return ::simd_make_uchar3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar3 make_uchar3(unsigned char x, uchar2 yz) { + return ::simd_make_uchar3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar3 make_uchar3(uchar2 xy, unsigned char z) { + return ::simd_make_uchar3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 8-bit unsigned integers. */ +template static SIMD_CPPFUNC uchar3 make_uchar3(typeN other) { + return ::simd_make_uchar3(other); +} + +/*! @abstract Extends `other` to form a vector of three 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uchar3 make_uchar3_undef(typeN other) { + return ::simd_make_uchar3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 8-bit unsigned integers. */ +static inline SIMD_CPPFUNC uchar4 make_uchar4(unsigned char x, unsigned char y, unsigned char z, unsigned char w) { + return ::simd_make_uchar4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar4 make_uchar4(unsigned char x, unsigned char y, uchar2 zw) { + return ::simd_make_uchar4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar4 make_uchar4(unsigned char x, uchar2 yz, unsigned char w) { + return ::simd_make_uchar4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar4 make_uchar4(uchar2 xy, unsigned char z, unsigned char w) { + return ::simd_make_uchar4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar4 make_uchar4(unsigned char x, uchar3 yzw) { + return ::simd_make_uchar4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar4 make_uchar4(uchar2 xy, uchar2 zw) { + return ::simd_make_uchar4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar4 make_uchar4(uchar3 xyz, unsigned char w) { + return ::simd_make_uchar4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 8-bit unsigned integers. */ +template static SIMD_CPPFUNC uchar4 make_uchar4(typeN other) { + return ::simd_make_uchar4(other); +} + +/*! @abstract Extends `other` to form a vector of four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uchar4 make_uchar4_undef(typeN other) { + return ::simd_make_uchar4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar8 make_uchar8(uchar4 lo, uchar4 hi) { + return ::simd_make_uchar8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 8-bit unsigned integers. */ +template static SIMD_CPPFUNC uchar8 make_uchar8(typeN other) { + return ::simd_make_uchar8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uchar8 make_uchar8_undef(typeN other) { + return ::simd_make_uchar8_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 8-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uchar16 make_uchar16(uchar8 lo, uchar8 hi) { + return ::simd_make_uchar16(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixteen + * 8-bit unsigned integers. */ +template static SIMD_CPPFUNC uchar16 make_uchar16(typeN other) { + return ::simd_make_uchar16(other); +} + +/*! @abstract Extends `other` to form a vector of sixteen 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uchar16 make_uchar16_undef(typeN other) { + return ::simd_make_uchar16_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 8-bit unsigned integers. */ +static inline SIMD_CPPFUNC uchar32 make_uchar32(uchar16 lo, uchar16 hi) { + return ::simd_make_uchar32(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of thirty- + * two 8-bit unsigned integers. */ +template static SIMD_CPPFUNC uchar32 make_uchar32(typeN other) { + return ::simd_make_uchar32(other); +} + +/*! @abstract Extends `other` to form a vector of thirty-two 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uchar32 make_uchar32_undef(typeN other) { + return ::simd_make_uchar32_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixty-four + * 8-bit unsigned integers. */ +static inline SIMD_CPPFUNC uchar64 make_uchar64(uchar32 lo, uchar32 hi) { + return ::simd_make_uchar64(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixty- + * four 8-bit unsigned integers. */ +template static SIMD_CPPFUNC uchar64 make_uchar64(typeN other) { + return ::simd_make_uchar64(other); +} + +/*! @abstract Extends `other` to form a vector of sixty-four 8-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uchar64 make_uchar64_undef(typeN other) { + return ::simd_make_uchar64_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 16-bit signed + * (twos-complement) integers. */ +static inline SIMD_CPPFUNC short2 make_short2(short x, short y) { + return ::simd_make_short2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 16-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC short2 make_short2(typeN other) { + return ::simd_make_short2(other); +} + +/*! @abstract Extends `other` to form a vector of two 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC short2 make_short2_undef(typeN other) { + return ::simd_make_short2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short3 make_short3(short x, short y, short z) { + return ::simd_make_short3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short3 make_short3(short x, short2 yz) { + return ::simd_make_short3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short3 make_short3(short2 xy, short z) { + return ::simd_make_short3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 16-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC short3 make_short3(typeN other) { + return ::simd_make_short3(other); +} + +/*! @abstract Extends `other` to form a vector of three 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC short3 make_short3_undef(typeN other) { + return ::simd_make_short3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 16-bit signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short4 make_short4(short x, short y, short z, short w) { + return ::simd_make_short4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short4 make_short4(short x, short y, short2 zw) { + return ::simd_make_short4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short4 make_short4(short x, short2 yz, short w) { + return ::simd_make_short4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short4 make_short4(short2 xy, short z, short w) { + return ::simd_make_short4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short4 make_short4(short x, short3 yzw) { + return ::simd_make_short4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short4 make_short4(short2 xy, short2 zw) { + return ::simd_make_short4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short4 make_short4(short3 xyz, short w) { + return ::simd_make_short4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 16-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC short4 make_short4(typeN other) { + return ::simd_make_short4(other); +} + +/*! @abstract Extends `other` to form a vector of four 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC short4 make_short4_undef(typeN other) { + return ::simd_make_short4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short8 make_short8(short4 lo, short4 hi) { + return ::simd_make_short8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 16-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC short8 make_short8(typeN other) { + return ::simd_make_short8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC short8 make_short8_undef(typeN other) { + return ::simd_make_short8_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 16-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short16 make_short16(short8 lo, short8 hi) { + return ::simd_make_short16(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixteen + * 16-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC short16 make_short16(typeN other) { + return ::simd_make_short16(other); +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +template static SIMD_CPPFUNC short16 make_short16_undef(typeN other) { + return ::simd_make_short16_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 16-bit signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC short32 make_short32(short16 lo, short16 hi) { + return ::simd_make_short32(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of thirty- + * two 16-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC short32 make_short32(typeN other) { + return ::simd_make_short32(other); +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +template static SIMD_CPPFUNC short32 make_short32_undef(typeN other) { + return ::simd_make_short32_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort2 make_ushort2(unsigned short x, unsigned short y) { + return ::simd_make_ushort2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 16-bit unsigned integers. */ +template static SIMD_CPPFUNC ushort2 make_ushort2(typeN other) { + return ::simd_make_ushort2(other); +} + +/*! @abstract Extends `other` to form a vector of two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ushort2 make_ushort2_undef(typeN other) { + return ::simd_make_ushort2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort3 make_ushort3(unsigned short x, unsigned short y, unsigned short z) { + return ::simd_make_ushort3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort3 make_ushort3(unsigned short x, ushort2 yz) { + return ::simd_make_ushort3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort3 make_ushort3(ushort2 xy, unsigned short z) { + return ::simd_make_ushort3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 16-bit unsigned integers. */ +template static SIMD_CPPFUNC ushort3 make_ushort3(typeN other) { + return ::simd_make_ushort3(other); +} + +/*! @abstract Extends `other` to form a vector of three 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ushort3 make_ushort3_undef(typeN other) { + return ::simd_make_ushort3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 16-bit unsigned integers. */ +static inline SIMD_CPPFUNC ushort4 make_ushort4(unsigned short x, unsigned short y, unsigned short z, unsigned short w) { + return ::simd_make_ushort4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort4 make_ushort4(unsigned short x, unsigned short y, ushort2 zw) { + return ::simd_make_ushort4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort4 make_ushort4(unsigned short x, ushort2 yz, unsigned short w) { + return ::simd_make_ushort4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort4 make_ushort4(ushort2 xy, unsigned short z, unsigned short w) { + return ::simd_make_ushort4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort4 make_ushort4(unsigned short x, ushort3 yzw) { + return ::simd_make_ushort4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort4 make_ushort4(ushort2 xy, ushort2 zw) { + return ::simd_make_ushort4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort4 make_ushort4(ushort3 xyz, unsigned short w) { + return ::simd_make_ushort4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 16-bit unsigned integers. */ +template static SIMD_CPPFUNC ushort4 make_ushort4(typeN other) { + return ::simd_make_ushort4(other); +} + +/*! @abstract Extends `other` to form a vector of four 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ushort4 make_ushort4_undef(typeN other) { + return ::simd_make_ushort4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort8 make_ushort8(ushort4 lo, ushort4 hi) { + return ::simd_make_ushort8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 16-bit unsigned integers. */ +template static SIMD_CPPFUNC ushort8 make_ushort8(typeN other) { + return ::simd_make_ushort8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ushort8 make_ushort8_undef(typeN other) { + return ::simd_make_ushort8_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 16-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ushort16 make_ushort16(ushort8 lo, ushort8 hi) { + return ::simd_make_ushort16(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixteen + * 16-bit unsigned integers. */ +template static SIMD_CPPFUNC ushort16 make_ushort16(typeN other) { + return ::simd_make_ushort16(other); +} + +/*! @abstract Extends `other` to form a vector of sixteen 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ushort16 make_ushort16_undef(typeN other) { + return ::simd_make_ushort16_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of thirty-two + * 16-bit unsigned integers. */ +static inline SIMD_CPPFUNC ushort32 make_ushort32(ushort16 lo, ushort16 hi) { + return ::simd_make_ushort32(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of thirty- + * two 16-bit unsigned integers. */ +template static SIMD_CPPFUNC ushort32 make_ushort32(typeN other) { + return ::simd_make_ushort32(other); +} + +/*! @abstract Extends `other` to form a vector of thirty-two 16-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ushort32 make_ushort32_undef(typeN other) { + return ::simd_make_ushort32_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 32-bit signed + * (twos-complement) integers. */ +static inline SIMD_CPPFUNC int2 make_int2(int x, int y) { + return ::simd_make_int2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 32-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC int2 make_int2(typeN other) { + return ::simd_make_int2(other); +} + +/*! @abstract Extends `other` to form a vector of two 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC int2 make_int2_undef(typeN other) { + return ::simd_make_int2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int3 make_int3(int x, int y, int z) { + return ::simd_make_int3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int3 make_int3(int x, int2 yz) { + return ::simd_make_int3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int3 make_int3(int2 xy, int z) { + return ::simd_make_int3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 32-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC int3 make_int3(typeN other) { + return ::simd_make_int3(other); +} + +/*! @abstract Extends `other` to form a vector of three 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC int3 make_int3_undef(typeN other) { + return ::simd_make_int3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 32-bit signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int4 make_int4(int x, int y, int z, int w) { + return ::simd_make_int4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int4 make_int4(int x, int y, int2 zw) { + return ::simd_make_int4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int4 make_int4(int x, int2 yz, int w) { + return ::simd_make_int4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int4 make_int4(int2 xy, int z, int w) { + return ::simd_make_int4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int4 make_int4(int x, int3 yzw) { + return ::simd_make_int4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int4 make_int4(int2 xy, int2 zw) { + return ::simd_make_int4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int4 make_int4(int3 xyz, int w) { + return ::simd_make_int4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 32-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC int4 make_int4(typeN other) { + return ::simd_make_int4(other); +} + +/*! @abstract Extends `other` to form a vector of four 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC int4 make_int4_undef(typeN other) { + return ::simd_make_int4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int8 make_int8(int4 lo, int4 hi) { + return ::simd_make_int8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 32-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC int8 make_int8(typeN other) { + return ::simd_make_int8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC int8 make_int8_undef(typeN other) { + return ::simd_make_int8_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 32-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC int16 make_int16(int8 lo, int8 hi) { + return ::simd_make_int16(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixteen + * 32-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC int16 make_int16(typeN other) { + return ::simd_make_int16(other); +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit signed + * (twos-complement) integers. The contents of the newly-created vector + * lanes are unspecified. */ +template static SIMD_CPPFUNC int16 make_int16_undef(typeN other) { + return ::simd_make_int16_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint2 make_uint2(unsigned int x, unsigned int y) { + return ::simd_make_uint2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 32-bit unsigned integers. */ +template static SIMD_CPPFUNC uint2 make_uint2(typeN other) { + return ::simd_make_uint2(other); +} + +/*! @abstract Extends `other` to form a vector of two 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uint2 make_uint2_undef(typeN other) { + return ::simd_make_uint2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint3 make_uint3(unsigned int x, unsigned int y, unsigned int z) { + return ::simd_make_uint3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint3 make_uint3(unsigned int x, uint2 yz) { + return ::simd_make_uint3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint3 make_uint3(uint2 xy, unsigned int z) { + return ::simd_make_uint3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 32-bit unsigned integers. */ +template static SIMD_CPPFUNC uint3 make_uint3(typeN other) { + return ::simd_make_uint3(other); +} + +/*! @abstract Extends `other` to form a vector of three 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uint3 make_uint3_undef(typeN other) { + return ::simd_make_uint3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 32-bit unsigned integers. */ +static inline SIMD_CPPFUNC uint4 make_uint4(unsigned int x, unsigned int y, unsigned int z, unsigned int w) { + return ::simd_make_uint4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint4 make_uint4(unsigned int x, unsigned int y, uint2 zw) { + return ::simd_make_uint4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint4 make_uint4(unsigned int x, uint2 yz, unsigned int w) { + return ::simd_make_uint4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint4 make_uint4(uint2 xy, unsigned int z, unsigned int w) { + return ::simd_make_uint4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint4 make_uint4(unsigned int x, uint3 yzw) { + return ::simd_make_uint4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint4 make_uint4(uint2 xy, uint2 zw) { + return ::simd_make_uint4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint4 make_uint4(uint3 xyz, unsigned int w) { + return ::simd_make_uint4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 32-bit unsigned integers. */ +template static SIMD_CPPFUNC uint4 make_uint4(typeN other) { + return ::simd_make_uint4(other); +} + +/*! @abstract Extends `other` to form a vector of four 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uint4 make_uint4_undef(typeN other) { + return ::simd_make_uint4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint8 make_uint8(uint4 lo, uint4 hi) { + return ::simd_make_uint8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 32-bit unsigned integers. */ +template static SIMD_CPPFUNC uint8 make_uint8(typeN other) { + return ::simd_make_uint8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uint8 make_uint8_undef(typeN other) { + return ::simd_make_uint8_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 32-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC uint16 make_uint16(uint8 lo, uint8 hi) { + return ::simd_make_uint16(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixteen + * 32-bit unsigned integers. */ +template static SIMD_CPPFUNC uint16 make_uint16(typeN other) { + return ::simd_make_uint16(other); +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC uint16 make_uint16_undef(typeN other) { + return ::simd_make_uint16_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float2 make_float2(float x, float y) { + return ::simd_make_float2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 32-bit floating-point numbers. */ +template static SIMD_CPPFUNC float2 make_float2(typeN other) { + return ::simd_make_float2(other); +} + +/*! @abstract Extends `other` to form a vector of two 32-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +template static SIMD_CPPFUNC float2 make_float2_undef(typeN other) { + return ::simd_make_float2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float3 make_float3(float x, float y, float z) { + return ::simd_make_float3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float3 make_float3(float x, float2 yz) { + return ::simd_make_float3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float3 make_float3(float2 xy, float z) { + return ::simd_make_float3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 32-bit floating-point numbers. */ +template static SIMD_CPPFUNC float3 make_float3(typeN other) { + return ::simd_make_float3(other); +} + +/*! @abstract Extends `other` to form a vector of three 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC float3 make_float3_undef(typeN other) { + return ::simd_make_float3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 32-bit floating-point numbers. */ +static inline SIMD_CPPFUNC float4 make_float4(float x, float y, float z, float w) { + return ::simd_make_float4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float4 make_float4(float x, float y, float2 zw) { + return ::simd_make_float4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float4 make_float4(float x, float2 yz, float w) { + return ::simd_make_float4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float4 make_float4(float2 xy, float z, float w) { + return ::simd_make_float4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float4 make_float4(float x, float3 yzw) { + return ::simd_make_float4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float4 make_float4(float2 xy, float2 zw) { + return ::simd_make_float4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float4 make_float4(float3 xyz, float w) { + return ::simd_make_float4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 32-bit floating-point numbers. */ +template static SIMD_CPPFUNC float4 make_float4(typeN other) { + return ::simd_make_float4(other); +} + +/*! @abstract Extends `other` to form a vector of four 32-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +template static SIMD_CPPFUNC float4 make_float4_undef(typeN other) { + return ::simd_make_float4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float8 make_float8(float4 lo, float4 hi) { + return ::simd_make_float8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 32-bit floating-point numbers. */ +template static SIMD_CPPFUNC float8 make_float8(typeN other) { + return ::simd_make_float8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC float8 make_float8_undef(typeN other) { + return ::simd_make_float8_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of sixteen 32-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC float16 make_float16(float8 lo, float8 hi) { + return ::simd_make_float16(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of sixteen + * 32-bit floating-point numbers. */ +template static SIMD_CPPFUNC float16 make_float16(typeN other) { + return ::simd_make_float16(other); +} + +/*! @abstract Extends `other` to form a vector of sixteen 32-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC float16 make_float16_undef(typeN other) { + return ::simd_make_float16_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 64-bit signed + * (twos-complement) integers. */ +static inline SIMD_CPPFUNC long2 make_long2(long1 x, long1 y) { + return ::simd_make_long2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 64-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC long2 make_long2(typeN other) { + return ::simd_make_long2(other); +} + +/*! @abstract Extends `other` to form a vector of two 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC long2 make_long2_undef(typeN other) { + return ::simd_make_long2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long3 make_long3(long1 x, long1 y, long1 z) { + return ::simd_make_long3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long3 make_long3(long1 x, long2 yz) { + return ::simd_make_long3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long3 make_long3(long2 xy, long1 z) { + return ::simd_make_long3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 64-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC long3 make_long3(typeN other) { + return ::simd_make_long3(other); +} + +/*! @abstract Extends `other` to form a vector of three 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC long3 make_long3_undef(typeN other) { + return ::simd_make_long3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 64-bit signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long4 make_long4(long1 x, long1 y, long1 z, long1 w) { + return ::simd_make_long4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long4 make_long4(long1 x, long1 y, long2 zw) { + return ::simd_make_long4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long4 make_long4(long1 x, long2 yz, long1 w) { + return ::simd_make_long4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long4 make_long4(long2 xy, long1 z, long1 w) { + return ::simd_make_long4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long4 make_long4(long1 x, long3 yzw) { + return ::simd_make_long4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long4 make_long4(long2 xy, long2 zw) { + return ::simd_make_long4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long4 make_long4(long3 xyz, long1 w) { + return ::simd_make_long4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 64-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC long4 make_long4(typeN other) { + return ::simd_make_long4(other); +} + +/*! @abstract Extends `other` to form a vector of four 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC long4 make_long4_undef(typeN other) { + return ::simd_make_long4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 64-bit + * signed (twos-complement) integers. */ +static inline SIMD_CPPFUNC long8 make_long8(long4 lo, long4 hi) { + return ::simd_make_long8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 64-bit signed (twos-complement) integers. */ +template static SIMD_CPPFUNC long8 make_long8(typeN other) { + return ::simd_make_long8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit signed (twos- + * complement) integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC long8 make_long8_undef(typeN other) { + return ::simd_make_long8_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong2 make_ulong2(ulong1 x, ulong1 y) { + return ::simd_make_ulong2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 64-bit unsigned integers. */ +template static SIMD_CPPFUNC ulong2 make_ulong2(typeN other) { + return ::simd_make_ulong2(other); +} + +/*! @abstract Extends `other` to form a vector of two 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ulong2 make_ulong2_undef(typeN other) { + return ::simd_make_ulong2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong3 make_ulong3(ulong1 x, ulong1 y, ulong1 z) { + return ::simd_make_ulong3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong3 make_ulong3(ulong1 x, ulong2 yz) { + return ::simd_make_ulong3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong3 make_ulong3(ulong2 xy, ulong1 z) { + return ::simd_make_ulong3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 64-bit unsigned integers. */ +template static SIMD_CPPFUNC ulong3 make_ulong3(typeN other) { + return ::simd_make_ulong3(other); +} + +/*! @abstract Extends `other` to form a vector of three 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ulong3 make_ulong3_undef(typeN other) { + return ::simd_make_ulong3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 64-bit unsigned integers. */ +static inline SIMD_CPPFUNC ulong4 make_ulong4(ulong1 x, ulong1 y, ulong1 z, ulong1 w) { + return ::simd_make_ulong4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong4 make_ulong4(ulong1 x, ulong1 y, ulong2 zw) { + return ::simd_make_ulong4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong4 make_ulong4(ulong1 x, ulong2 yz, ulong1 w) { + return ::simd_make_ulong4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong4 make_ulong4(ulong2 xy, ulong1 z, ulong1 w) { + return ::simd_make_ulong4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong4 make_ulong4(ulong1 x, ulong3 yzw) { + return ::simd_make_ulong4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong4 make_ulong4(ulong2 xy, ulong2 zw) { + return ::simd_make_ulong4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong4 make_ulong4(ulong3 xyz, ulong1 w) { + return ::simd_make_ulong4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 64-bit unsigned integers. */ +template static SIMD_CPPFUNC ulong4 make_ulong4(typeN other) { + return ::simd_make_ulong4(other); +} + +/*! @abstract Extends `other` to form a vector of four 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ulong4 make_ulong4_undef(typeN other) { + return ::simd_make_ulong4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 64-bit + * unsigned integers. */ +static inline SIMD_CPPFUNC ulong8 make_ulong8(ulong4 lo, ulong4 hi) { + return ::simd_make_ulong8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 64-bit unsigned integers. */ +template static SIMD_CPPFUNC ulong8 make_ulong8(typeN other) { + return ::simd_make_ulong8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit unsigned + * integers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC ulong8 make_ulong8_undef(typeN other) { + return ::simd_make_ulong8_undef(other); +} + +/*! @abstract Concatenates `x` and `y` to form a vector of two 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double2 make_double2(double x, double y) { + return ::simd_make_double2(x, y); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of two + * 64-bit floating-point numbers. */ +template static SIMD_CPPFUNC double2 make_double2(typeN other) { + return ::simd_make_double2(other); +} + +/*! @abstract Extends `other` to form a vector of two 64-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +template static SIMD_CPPFUNC double2 make_double2_undef(typeN other) { + return ::simd_make_double2_undef(other); +} + +/*! @abstract Concatenates `x`, `y` and `z` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double3 make_double3(double x, double y, double z) { + return ::simd_make_double3(x, y, z); +} + +/*! @abstract Concatenates `x` and `yz` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double3 make_double3(double x, double2 yz) { + return ::simd_make_double3(x, yz); +} + +/*! @abstract Concatenates `xy` and `z` to form a vector of three 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double3 make_double3(double2 xy, double z) { + return ::simd_make_double3(xy, z); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of three + * 64-bit floating-point numbers. */ +template static SIMD_CPPFUNC double3 make_double3(typeN other) { + return ::simd_make_double3(other); +} + +/*! @abstract Extends `other` to form a vector of three 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC double3 make_double3_undef(typeN other) { + return ::simd_make_double3_undef(other); +} + +/*! @abstract Concatenates `x`, `y`, `z` and `w` to form a vector of four + * 64-bit floating-point numbers. */ +static inline SIMD_CPPFUNC double4 make_double4(double x, double y, double z, double w) { + return ::simd_make_double4(x, y, z, w); +} + +/*! @abstract Concatenates `x`, `y` and `zw` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double4 make_double4(double x, double y, double2 zw) { + return ::simd_make_double4(x, y, zw); +} + +/*! @abstract Concatenates `x`, `yz` and `w` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double4 make_double4(double x, double2 yz, double w) { + return ::simd_make_double4(x, yz, w); +} + +/*! @abstract Concatenates `xy`, `z` and `w` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double4 make_double4(double2 xy, double z, double w) { + return ::simd_make_double4(xy, z, w); +} + +/*! @abstract Concatenates `x` and `yzw` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double4 make_double4(double x, double3 yzw) { + return ::simd_make_double4(x, yzw); +} + +/*! @abstract Concatenates `xy` and `zw` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double4 make_double4(double2 xy, double2 zw) { + return ::simd_make_double4(xy, zw); +} + +/*! @abstract Concatenates `xyz` and `w` to form a vector of four 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double4 make_double4(double3 xyz, double w) { + return ::simd_make_double4(xyz, w); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of four + * 64-bit floating-point numbers. */ +template static SIMD_CPPFUNC double4 make_double4(typeN other) { + return ::simd_make_double4(other); +} + +/*! @abstract Extends `other` to form a vector of four 64-bit floating-point + * numbers. The contents of the newly-created vector lanes are unspecified. */ +template static SIMD_CPPFUNC double4 make_double4_undef(typeN other) { + return ::simd_make_double4_undef(other); +} + +/*! @abstract Concatenates `lo` and `hi` to form a vector of eight 64-bit + * floating-point numbers. */ +static inline SIMD_CPPFUNC double8 make_double8(double4 lo, double4 hi) { + return ::simd_make_double8(lo, hi); +} + +/*! @abstract Truncates or zero-extends `other` to form a vector of eight + * 64-bit floating-point numbers. */ +template static SIMD_CPPFUNC double8 make_double8(typeN other) { + return ::simd_make_double8(other); +} + +/*! @abstract Extends `other` to form a vector of eight 64-bit floating- + * point numbers. The contents of the newly-created vector lanes are + * unspecified. */ +template static SIMD_CPPFUNC double8 make_double8_undef(typeN other) { + return ::simd_make_double8_undef(other); +} + +/*! @struct Vector + * @abstract Templated Vector struct based on scalar type and number of + * elements + * @field count Number of elements in the vector + * @field scalar_t The scalar type of each element + * @field type The inferred simd::typeN type + * @field packed_t The inferred simd::packed::typeN type + * @field mask_t The return type of comparison operations */ +template struct Vector { + // static const size_t count + // typedef scalar_t + // typedef type + // typedef packed_t + // typedef mask_t +}; +/*! @abstract Helper type to access the simd type easily. */ +template +using Vector_t = typename Vector::type; + +/*! @abstract Look up the equivalent Vector struct according to the simd + * type. */ +template struct get_traits +{ +// using type = Vector; +}; +/*! @abstract Helper type to access the Vector struct easily. + * @discussion This is commonly used to query the type traits of a simd + * type. + * For example, simd::traits::count is 4. */ +template +using traits = typename get_traits::type; + +template<> struct Vector { + static const size_t count = 1; + typedef char1 scalar_t; + typedef char1 type; + typedef char1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef char1 scalar_t; + typedef char2 type; + typedef packed::char2 packed_t; + typedef char2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef char1 scalar_t; + typedef char3 type; + typedef char3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef char1 scalar_t; + typedef char4 type; + typedef packed::char4 packed_t; + typedef char4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef char1 scalar_t; + typedef char8 type; + typedef packed::char8 packed_t; + typedef char8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 16; + typedef char1 scalar_t; + typedef char16 type; + typedef packed::char16 packed_t; + typedef char16 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 32; + typedef char1 scalar_t; + typedef char32 type; + typedef packed::char32 packed_t; + typedef char32 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 64; + typedef char1 scalar_t; + typedef char64 type; + typedef packed::char64 packed_t; + typedef char64 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef uchar1 scalar_t; + typedef uchar1 type; + typedef char1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef uchar1 scalar_t; + typedef uchar2 type; + typedef packed::uchar2 packed_t; + typedef char2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef uchar1 scalar_t; + typedef uchar3 type; + typedef char3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef uchar1 scalar_t; + typedef uchar4 type; + typedef packed::uchar4 packed_t; + typedef char4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef uchar1 scalar_t; + typedef uchar8 type; + typedef packed::uchar8 packed_t; + typedef char8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 16; + typedef uchar1 scalar_t; + typedef uchar16 type; + typedef packed::uchar16 packed_t; + typedef char16 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 32; + typedef uchar1 scalar_t; + typedef uchar32 type; + typedef packed::uchar32 packed_t; + typedef char32 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 64; + typedef uchar1 scalar_t; + typedef uchar64 type; + typedef packed::uchar64 packed_t; + typedef char64 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef short1 scalar_t; + typedef short1 type; + typedef short1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef short1 scalar_t; + typedef short2 type; + typedef packed::short2 packed_t; + typedef short2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef short1 scalar_t; + typedef short3 type; + typedef short3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef short1 scalar_t; + typedef short4 type; + typedef packed::short4 packed_t; + typedef short4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef short1 scalar_t; + typedef short8 type; + typedef packed::short8 packed_t; + typedef short8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 16; + typedef short1 scalar_t; + typedef short16 type; + typedef packed::short16 packed_t; + typedef short16 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 32; + typedef short1 scalar_t; + typedef short32 type; + typedef packed::short32 packed_t; + typedef short32 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef ushort1 scalar_t; + typedef ushort1 type; + typedef short1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef ushort1 scalar_t; + typedef ushort2 type; + typedef packed::ushort2 packed_t; + typedef short2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef ushort1 scalar_t; + typedef ushort3 type; + typedef short3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef ushort1 scalar_t; + typedef ushort4 type; + typedef packed::ushort4 packed_t; + typedef short4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef ushort1 scalar_t; + typedef ushort8 type; + typedef packed::ushort8 packed_t; + typedef short8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 16; + typedef ushort1 scalar_t; + typedef ushort16 type; + typedef packed::ushort16 packed_t; + typedef short16 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 32; + typedef ushort1 scalar_t; + typedef ushort32 type; + typedef packed::ushort32 packed_t; + typedef short32 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef int1 scalar_t; + typedef int1 type; + typedef int1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef int1 scalar_t; + typedef int2 type; + typedef packed::int2 packed_t; + typedef int2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef int1 scalar_t; + typedef int3 type; + typedef int3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef int1 scalar_t; + typedef int4 type; + typedef packed::int4 packed_t; + typedef int4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef int1 scalar_t; + typedef int8 type; + typedef packed::int8 packed_t; + typedef int8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 16; + typedef int1 scalar_t; + typedef int16 type; + typedef packed::int16 packed_t; + typedef int16 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef uint1 scalar_t; + typedef uint1 type; + typedef int1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef uint1 scalar_t; + typedef uint2 type; + typedef packed::uint2 packed_t; + typedef int2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef uint1 scalar_t; + typedef uint3 type; + typedef int3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef uint1 scalar_t; + typedef uint4 type; + typedef packed::uint4 packed_t; + typedef int4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef uint1 scalar_t; + typedef uint8 type; + typedef packed::uint8 packed_t; + typedef int8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 16; + typedef uint1 scalar_t; + typedef uint16 type; + typedef packed::uint16 packed_t; + typedef int16 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef float1 scalar_t; + typedef float1 type; + typedef int1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef float1 scalar_t; + typedef float2 type; + typedef packed::float2 packed_t; + typedef int2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef float1 scalar_t; + typedef float3 type; + typedef int3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef float1 scalar_t; + typedef float4 type; + typedef packed::float4 packed_t; + typedef int4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef float1 scalar_t; + typedef float8 type; + typedef packed::float8 packed_t; + typedef int8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 16; + typedef float1 scalar_t; + typedef float16 type; + typedef packed::float16 packed_t; + typedef int16 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef long1 scalar_t; + typedef long1 type; + typedef long1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef long1 scalar_t; + typedef long2 type; + typedef packed::long2 packed_t; + typedef long2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef long1 scalar_t; + typedef long3 type; + typedef long3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef long1 scalar_t; + typedef long4 type; + typedef packed::long4 packed_t; + typedef long4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef long1 scalar_t; + typedef long8 type; + typedef packed::long8 packed_t; + typedef long8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef ulong1 scalar_t; + typedef ulong1 type; + typedef long1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef ulong1 scalar_t; + typedef ulong2 type; + typedef packed::ulong2 packed_t; + typedef long2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef ulong1 scalar_t; + typedef ulong3 type; + typedef long3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef ulong1 scalar_t; + typedef ulong4 type; + typedef packed::ulong4 packed_t; + typedef long4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef ulong1 scalar_t; + typedef ulong8 type; + typedef packed::ulong8 packed_t; + typedef long8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 1; + typedef double1 scalar_t; + typedef double1 type; + typedef long1 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 2; + typedef double1 scalar_t; + typedef double2 type; + typedef packed::double2 packed_t; + typedef long2 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 3; + typedef double1 scalar_t; + typedef double3 type; + typedef long3 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 4; + typedef double1 scalar_t; + typedef double4 type; + typedef packed::double4 packed_t; + typedef long4 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +template<> struct Vector { + static const size_t count = 8; + typedef double1 scalar_t; + typedef double8 type; + typedef packed::double8 packed_t; + typedef long8 mask_t; +}; + +template <> struct get_traits +{ + using type = Vector; +}; + +#if __has_feature(cxx_constexpr) +/*! @abstract Templated make function based on return type and argument + * type. */ +template +static constexpr typeN make(Args... args) +{ + if constexpr (traits::count == 1) + { + using FirstArgType = typename std::tuple_element<0, std::tuple>::type; + if constexpr (std::is_same::scalar_t>::value) + return typeN(std::get<0>(std::make_tuple(args...))); + else + return typeN(std::get<0>(std::make_tuple(args...))[0]); + } + else if constexpr (std::is_same::value) + return make_char2(args...); + else if constexpr (std::is_same::value) + return make_char3(args...); + else if constexpr (std::is_same::value) + return make_char4(args...); + else if constexpr (std::is_same::value) + return make_char8(args...); + else if constexpr (std::is_same::value) + return make_char16(args...); + else if constexpr (std::is_same::value) + return make_char32(args...); + else if constexpr (std::is_same::value) + return make_char64(args...); + else if constexpr (std::is_same::value) + return make_uchar2(args...); + else if constexpr (std::is_same::value) + return make_uchar3(args...); + else if constexpr (std::is_same::value) + return make_uchar4(args...); + else if constexpr (std::is_same::value) + return make_uchar8(args...); + else if constexpr (std::is_same::value) + return make_uchar16(args...); + else if constexpr (std::is_same::value) + return make_uchar32(args...); + else if constexpr (std::is_same::value) + return make_uchar64(args...); + else if constexpr (std::is_same::value) + return make_short2(args...); + else if constexpr (std::is_same::value) + return make_short3(args...); + else if constexpr (std::is_same::value) + return make_short4(args...); + else if constexpr (std::is_same::value) + return make_short8(args...); + else if constexpr (std::is_same::value) + return make_short16(args...); + else if constexpr (std::is_same::value) + return make_short32(args...); + else if constexpr (std::is_same::value) + return make_ushort2(args...); + else if constexpr (std::is_same::value) + return make_ushort3(args...); + else if constexpr (std::is_same::value) + return make_ushort4(args...); + else if constexpr (std::is_same::value) + return make_ushort8(args...); + else if constexpr (std::is_same::value) + return make_ushort16(args...); + else if constexpr (std::is_same::value) + return make_ushort32(args...); + else if constexpr (std::is_same::value) + return make_int2(args...); + else if constexpr (std::is_same::value) + return make_int3(args...); + else if constexpr (std::is_same::value) + return make_int4(args...); + else if constexpr (std::is_same::value) + return make_int8(args...); + else if constexpr (std::is_same::value) + return make_int16(args...); + else if constexpr (std::is_same::value) + return make_uint2(args...); + else if constexpr (std::is_same::value) + return make_uint3(args...); + else if constexpr (std::is_same::value) + return make_uint4(args...); + else if constexpr (std::is_same::value) + return make_uint8(args...); + else if constexpr (std::is_same::value) + return make_uint16(args...); + else if constexpr (std::is_same::value) + return make_float2(args...); + else if constexpr (std::is_same::value) + return make_float3(args...); + else if constexpr (std::is_same::value) + return make_float4(args...); + else if constexpr (std::is_same::value) + return make_float8(args...); + else if constexpr (std::is_same::value) + return make_float16(args...); + else if constexpr (std::is_same::value) + return make_long2(args...); + else if constexpr (std::is_same::value) + return make_long3(args...); + else if constexpr (std::is_same::value) + return make_long4(args...); + else if constexpr (std::is_same::value) + return make_long8(args...); + else if constexpr (std::is_same::value) + return make_ulong2(args...); + else if constexpr (std::is_same::value) + return make_ulong3(args...); + else if constexpr (std::is_same::value) + return make_ulong4(args...); + else if constexpr (std::is_same::value) + return make_ulong8(args...); + else if constexpr (std::is_same::value) + return make_double2(args...); + else if constexpr (std::is_same::value) + return make_double3(args...); + else if constexpr (std::is_same::value) + return make_double4(args...); + else if constexpr (std::is_same::value) + return make_double8(args...); +} + +/*! @abstract Templated make_undef function based on return type and + * argument type. */ +template +static constexpr typeN make_undef(Args... args) +{ + if constexpr (traits::count == 1) + { + using FirstArgType = typename std::tuple_element<0, std::tuple>::type; + if constexpr (std::is_same::scalar_t>::value) + return typeN(std::get<0>(std::make_tuple(args...))); + else + return typeN(std::get<0>(std::make_tuple(args...))[0]); + } + else if constexpr (std::is_same::value) + return make_char2_undef(args...); + else if constexpr (std::is_same::value) + return make_char3_undef(args...); + else if constexpr (std::is_same::value) + return make_char4_undef(args...); + else if constexpr (std::is_same::value) + return make_char8_undef(args...); + else if constexpr (std::is_same::value) + return make_char16_undef(args...); + else if constexpr (std::is_same::value) + return make_char32_undef(args...); + else if constexpr (std::is_same::value) + return make_char64_undef(args...); + else if constexpr (std::is_same::value) + return make_uchar2_undef(args...); + else if constexpr (std::is_same::value) + return make_uchar3_undef(args...); + else if constexpr (std::is_same::value) + return make_uchar4_undef(args...); + else if constexpr (std::is_same::value) + return make_uchar8_undef(args...); + else if constexpr (std::is_same::value) + return make_uchar16_undef(args...); + else if constexpr (std::is_same::value) + return make_uchar32_undef(args...); + else if constexpr (std::is_same::value) + return make_uchar64_undef(args...); + else if constexpr (std::is_same::value) + return make_short2_undef(args...); + else if constexpr (std::is_same::value) + return make_short3_undef(args...); + else if constexpr (std::is_same::value) + return make_short4_undef(args...); + else if constexpr (std::is_same::value) + return make_short8_undef(args...); + else if constexpr (std::is_same::value) + return make_short16_undef(args...); + else if constexpr (std::is_same::value) + return make_short32_undef(args...); + else if constexpr (std::is_same::value) + return make_ushort2_undef(args...); + else if constexpr (std::is_same::value) + return make_ushort3_undef(args...); + else if constexpr (std::is_same::value) + return make_ushort4_undef(args...); + else if constexpr (std::is_same::value) + return make_ushort8_undef(args...); + else if constexpr (std::is_same::value) + return make_ushort16_undef(args...); + else if constexpr (std::is_same::value) + return make_ushort32_undef(args...); + else if constexpr (std::is_same::value) + return make_int2_undef(args...); + else if constexpr (std::is_same::value) + return make_int3_undef(args...); + else if constexpr (std::is_same::value) + return make_int4_undef(args...); + else if constexpr (std::is_same::value) + return make_int8_undef(args...); + else if constexpr (std::is_same::value) + return make_int16_undef(args...); + else if constexpr (std::is_same::value) + return make_uint2_undef(args...); + else if constexpr (std::is_same::value) + return make_uint3_undef(args...); + else if constexpr (std::is_same::value) + return make_uint4_undef(args...); + else if constexpr (std::is_same::value) + return make_uint8_undef(args...); + else if constexpr (std::is_same::value) + return make_uint16_undef(args...); + else if constexpr (std::is_same::value) + return make_float2_undef(args...); + else if constexpr (std::is_same::value) + return make_float3_undef(args...); + else if constexpr (std::is_same::value) + return make_float4_undef(args...); + else if constexpr (std::is_same::value) + return make_float8_undef(args...); + else if constexpr (std::is_same::value) + return make_float16_undef(args...); + else if constexpr (std::is_same::value) + return make_long2_undef(args...); + else if constexpr (std::is_same::value) + return make_long3_undef(args...); + else if constexpr (std::is_same::value) + return make_long4_undef(args...); + else if constexpr (std::is_same::value) + return make_long8_undef(args...); + else if constexpr (std::is_same::value) + return make_ulong2_undef(args...); + else if constexpr (std::is_same::value) + return make_ulong3_undef(args...); + else if constexpr (std::is_same::value) + return make_ulong4_undef(args...); + else if constexpr (std::is_same::value) + return make_ulong8_undef(args...); + else if constexpr (std::is_same::value) + return make_double2_undef(args...); + else if constexpr (std::is_same::value) + return make_double3_undef(args...); + else if constexpr (std::is_same::value) + return make_double4_undef(args...); + else if constexpr (std::is_same::value) + return make_double8_undef(args...); +} +#endif /* __has_feature(cxx_constexpr) */ +} /* namespace simd */ +#endif /* __cplusplus */ +#endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif /* SIMD_VECTOR_CONSTRUCTORS */ diff --git a/vfsoverlay/vector_types.h b/vfsoverlay/vector_types.h new file mode 100644 index 00000000..223d696e --- /dev/null +++ b/vfsoverlay/vector_types.h @@ -0,0 +1,1281 @@ +/*! @header + * This header defines fixed size vector types that are useful both for + * graphics and geometry, and for software vectorization without + * architecture-specific intrinsics. + * + * These types are based on a clang feature called "Extended vector types" + * or "OpenCL vector types" (despite the name, these types work just fine + * in C, Objective-C, and C++). There are a few tricks that make these + * types nicer to work with than traditional simd intrinsic types: + * + * - Basic arithmetic operators are overloaded to perform lanewise + * operations with these types, including both vector-vector and + * vector-scalar operations. + * + * - It is possible to access vector components both via array-style + * subscripting and by using the "." operator with component names + * "x", "y", "z", "w", and permutations thereof. + * + * - There are also some named subvectors: .lo and .hi are the first + * and second halves of a vector, and .even and .odd are the even- + * and odd-indexed elements of a vector. + * + * - Clang provides some useful builtins that operate on these vector + * types: __builtin_shufflevector and __builtin_convertvector. + * + * - The headers define a large assortment of vector and + * matrix operations that work on these types. + * + * - You can also use the simd types with the architecture-specific + * intrinsics defined in and . + * + * The following vector types are defined by this header: + * + * simd_charN where N is 1, 2, 3, 4, 8, 16, 32, or 64. + * simd_ucharN where N is 1, 2, 3, 4, 8, 16, 32, or 64. + * simd_shortN where N is 1, 2, 3, 4, 8, 16, or 32. + * simd_ushortN where N is 1, 2, 3, 4, 8, 16, or 32. + * simd_intN where N is 1, 2, 3, 4, 8, or 16. + * simd_uintN where N is 1, 2, 3, 4, 8, or 16. + * simd_floatN where N is 1, 2, 3, 4, 8, or 16. + * simd_longN where N is 1, 2, 3, 4, or 8. + * simd_ulongN where N is 1, 2, 3, 4, or 8. + * simd_doubleN where N is 1, 2, 3, 4, or 8. + * + * These types generally have greater alignment than the underlying scalar + * type; they are aligned to either the size of the vector[1] or 16 bytes, + * whichever is smaller. + * + * [1] Note that sizeof a three-element vector is the same as sizeof the + * corresponding four-element vector, because three-element vectors have + * a hidden lane of padding. + * + * In earlier versions of the simd library, the alignment of vectors could + * be larger than 16B, up to the "architectural vector size" of 16, 32, or + * 64B, depending on what options were passed on the command line when + * compiling. This super-alignment does not interact well with malloc, and + * makes it difficult for libraries to provide a stable API, while conferring + * relatively little performance benefit, so it has been relaxed. + * + * For each simd_typeN type where N is not 1 or 3, there is also a + * corresponding simd_packed_typeN type that requires only the alignment + * matching that of the underlying scalar type. Use this if you need to + * work with pointers-to or arrays-of scalar values: + * + * void myFunction(float *pointerToFourFloats) { + * // This is a bug, because `pointerToFourFloats` does not satisfy + * // the alignment requirements of the `simd_float4` type; attempting + * // to dereference (load from) `vecptr` is likely to crash at runtime. + * simd_float4 *vecptr = (simd_float4 *)pointerToFourFloats; + * + * // Instead, convert to `simd_packed_float4`: + * simd_packed_float4 *vecptr = (simd_packed_float4 *)pointerToFourFloats; + * // The `simd_packed_float4` type has the same alignment requirements + * // as `float`, so this conversion is safe, and lets us load a vector. + * // Note that `simd_packed_float4` can be assigned to `simd_float4` + * // without any conversion; they types only behave differently as + * // pointers or arrays. + * simd_float4 vector = vecptr[0]; + * } + * + * All of the simd_-prefixed types are also available in the C++ simd:: + * namespace; simd_char4 can be used as simd::char4, for example. These types + * largely match the Metal shader language vector types, except that there + * are no vector types larger than 4 elements in Metal. + * + * @copyright 2014-2017 Apple, Inc. All rights reserved. + * @unsorted */ + +#ifndef SIMD_VECTOR_TYPES +#define SIMD_VECTOR_TYPES + +# include +# if SIMD_COMPILER_HAS_REQUIRED_FEATURES + +/* MARK: Basic vector types */ + +/*! @group C and Objective-C vector types + * @discussion These are the basic types that underpin the simd library. */ + +/*! @abstract A scalar 8-bit signed (twos-complement) integer. */ +typedef char simd_char1; + +/*! @abstract A vector of two 8-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::char2. The alignment of this type is greater than the alignment of + * char; if you need to operate on data buffers that may not be suitably + * aligned, you should access them using simd_packed_char2 instead. */ +typedef __attribute__((__ext_vector_type__(2))) char simd_char2; + +/*! @abstract A vector of three 8-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::char3. Note that vectors of this type are padded to have the same + * size and alignment as simd_char4. */ +typedef __attribute__((__ext_vector_type__(3))) char simd_char3; + +/*! @abstract A vector of four 8-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::char4. The alignment of this type is greater than the alignment of + * char; if you need to operate on data buffers that may not be suitably + * aligned, you should access them using simd_packed_char4 instead. */ +typedef __attribute__((__ext_vector_type__(4))) char simd_char4; + +/*! @abstract A vector of eight 8-bit signed (twos-complement) integers. + * @description In C++ this type is also available as simd::char8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of char; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_char8 instead. */ +typedef __attribute__((__ext_vector_type__(8))) char simd_char8; + +/*! @abstract A vector of sixteen 8-bit signed (twos-complement) integers. + * @description In C++ this type is also available as simd::char16. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of char; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_char16 instead. */ +typedef __attribute__((__ext_vector_type__(16))) char simd_char16; + +/*! @abstract A vector of thirty-two 8-bit signed (twos-complement) + * integers. + * @description In C++ this type is also available as simd::char32. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of char; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_char32 instead. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) char simd_char32; + +/*! @abstract A vector of sixty-four 8-bit signed (twos-complement) + * integers. + * @description In C++ this type is also available as simd::char64. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of char; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_char64 instead. */ +typedef __attribute__((__ext_vector_type__(64),__aligned__(16))) char simd_char64; + +/*! @abstract A scalar 8-bit unsigned integer. */ +typedef unsigned char simd_uchar1; + +/*! @abstract A vector of two 8-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::uchar2. The alignment of this type is greater than the alignment + * of unsigned char; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_uchar2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) unsigned char simd_uchar2; + +/*! @abstract A vector of three 8-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::uchar3. Note that vectors of this type are padded to have the same + * size and alignment as simd_uchar4. */ +typedef __attribute__((__ext_vector_type__(3))) unsigned char simd_uchar3; + +/*! @abstract A vector of four 8-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::uchar4. The alignment of this type is greater than the alignment + * of unsigned char; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_uchar4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4))) unsigned char simd_uchar4; + +/*! @abstract A vector of eight 8-bit unsigned integers. + * @description In C++ this type is also available as simd::uchar8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_uchar8 instead. */ +typedef __attribute__((__ext_vector_type__(8))) unsigned char simd_uchar8; + +/*! @abstract A vector of sixteen 8-bit unsigned integers. + * @description In C++ this type is also available as simd::uchar16. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_uchar16 instead. */ +typedef __attribute__((__ext_vector_type__(16))) unsigned char simd_uchar16; + +/*! @abstract A vector of thirty-two 8-bit unsigned integers. + * @description In C++ this type is also available as simd::uchar32. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_uchar32 instead. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) unsigned char simd_uchar32; + +/*! @abstract A vector of sixty-four 8-bit unsigned integers. + * @description In C++ this type is also available as simd::uchar64. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_uchar64 instead. */ +typedef __attribute__((__ext_vector_type__(64),__aligned__(16))) unsigned char simd_uchar64; + +/*! @abstract A scalar 16-bit signed (twos-complement) integer. */ +typedef short simd_short1; + +/*! @abstract A vector of two 16-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::short2. The alignment of this type is greater than the alignment + * of short; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_short2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) short simd_short2; + +/*! @abstract A vector of three 16-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::short3. Note that vectors of this type are padded to have the same + * size and alignment as simd_short4. */ +typedef __attribute__((__ext_vector_type__(3))) short simd_short3; + +/*! @abstract A vector of four 16-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::short4. The alignment of this type is greater than the alignment + * of short; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_short4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4))) short simd_short4; + +/*! @abstract A vector of eight 16-bit signed (twos-complement) integers. + * @description In C++ this type is also available as simd::short8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of short; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_short8 instead. */ +typedef __attribute__((__ext_vector_type__(8))) short simd_short8; + +/*! @abstract A vector of sixteen 16-bit signed (twos-complement) integers. + * @description In C++ this type is also available as simd::short16. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of short; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_short16 instead. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) short simd_short16; + +/*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers. + * @description In C++ this type is also available as simd::short32. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of short; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_short32 instead. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) short simd_short32; + +/*! @abstract A scalar 16-bit unsigned integer. */ +typedef unsigned short simd_ushort1; + +/*! @abstract A vector of two 16-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::ushort2. The alignment of this type is greater than the alignment + * of unsigned short; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd_packed_ushort2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) unsigned short simd_ushort2; + +/*! @abstract A vector of three 16-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::ushort3. Note that vectors of this type are padded to have the + * same size and alignment as simd_ushort4. */ +typedef __attribute__((__ext_vector_type__(3))) unsigned short simd_ushort3; + +/*! @abstract A vector of four 16-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::ushort4. The alignment of this type is greater than the alignment + * of unsigned short; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd_packed_ushort4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4))) unsigned short simd_ushort4; + +/*! @abstract A vector of eight 16-bit unsigned integers. + * @description In C++ this type is also available as simd::ushort8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned short; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_ushort8 instead. */ +typedef __attribute__((__ext_vector_type__(8))) unsigned short simd_ushort8; + +/*! @abstract A vector of sixteen 16-bit unsigned integers. + * @description In C++ this type is also available as simd::ushort16. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned short; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_ushort16 instead. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) unsigned short simd_ushort16; + +/*! @abstract A vector of thirty-two 16-bit unsigned integers. + * @description In C++ this type is also available as simd::ushort32. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned short; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_ushort32 instead. */ +typedef __attribute__((__ext_vector_type__(32),__aligned__(16))) unsigned short simd_ushort32; + +/*! @abstract A scalar 32-bit signed (twos-complement) integer. */ +typedef int simd_int1; + +/*! @abstract A vector of two 32-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::int2. The alignment of this type is greater than the alignment of + * int; if you need to operate on data buffers that may not be suitably + * aligned, you should access them using simd_packed_int2 instead. */ +typedef __attribute__((__ext_vector_type__(2))) int simd_int2; + +/*! @abstract A vector of three 32-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::int3. Note that vectors of this type are padded to have the same + * size and alignment as simd_int4. */ +typedef __attribute__((__ext_vector_type__(3))) int simd_int3; + +/*! @abstract A vector of four 32-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::int4. The alignment of this type is greater than the alignment of + * int; if you need to operate on data buffers that may not be suitably + * aligned, you should access them using simd_packed_int4 instead. */ +typedef __attribute__((__ext_vector_type__(4))) int simd_int4; + +/*! @abstract A vector of eight 32-bit signed (twos-complement) integers. + * @description In C++ this type is also available as simd::int8. This type + * is not available in Metal. The alignment of this type is greater than + * the alignment of int; if you need to operate on data buffers that may + * not be suitably aligned, you should access them using simd_packed_int8 + * instead. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) int simd_int8; + +/*! @abstract A vector of sixteen 32-bit signed (twos-complement) integers. + * @description In C++ this type is also available as simd::int16. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of int; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_int16 instead. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) int simd_int16; + +/*! @abstract A scalar 32-bit unsigned integer. */ +typedef unsigned int simd_uint1; + +/*! @abstract A vector of two 32-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::uint2. The alignment of this type is greater than the alignment of + * unsigned int; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_uint2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) unsigned int simd_uint2; + +/*! @abstract A vector of three 32-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::uint3. Note that vectors of this type are padded to have the same + * size and alignment as simd_uint4. */ +typedef __attribute__((__ext_vector_type__(3))) unsigned int simd_uint3; + +/*! @abstract A vector of four 32-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::uint4. The alignment of this type is greater than the alignment of + * unsigned int; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_uint4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4))) unsigned int simd_uint4; + +/*! @abstract A vector of eight 32-bit unsigned integers. + * @description In C++ this type is also available as simd::uint8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned int; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_uint8 instead. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) unsigned int simd_uint8; + +/*! @abstract A vector of sixteen 32-bit unsigned integers. + * @description In C++ this type is also available as simd::uint16. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of unsigned int; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_uint16 instead. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) unsigned int simd_uint16; + +/*! @abstract A scalar 32-bit floating-point number. */ +typedef float simd_float1; + +/*! @abstract A vector of two 32-bit floating-point numbers. + * @description In C++ and Metal, this type is also available as + * simd::float2. The alignment of this type is greater than the alignment + * of float; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_float2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) float simd_float2; + +/*! @abstract A vector of three 32-bit floating-point numbers. + * @description In C++ and Metal, this type is also available as + * simd::float3. Note that vectors of this type are padded to have the same + * size and alignment as simd_float4. */ +typedef __attribute__((__ext_vector_type__(3))) float simd_float3; + +/*! @abstract A vector of four 32-bit floating-point numbers. + * @description In C++ and Metal, this type is also available as + * simd::float4. The alignment of this type is greater than the alignment + * of float; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_float4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4))) float simd_float4; + +/*! @abstract A vector of eight 32-bit floating-point numbers. + * @description In C++ this type is also available as simd::float8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of float; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_float8 instead. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) float simd_float8; + +/*! @abstract A vector of sixteen 32-bit floating-point numbers. + * @description In C++ this type is also available as simd::float16. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of float; if you need to operate on data buffers that + * may not be suitably aligned, you should access them using + * simd_packed_float16 instead. */ +typedef __attribute__((__ext_vector_type__(16),__aligned__(16))) float simd_float16; + +/*! @abstract A scalar 64-bit signed (twos-complement) integer. */ +#if defined __LP64__ +typedef long simd_long1; +#else +typedef long long simd_long1; +#endif + +/*! @abstract A vector of two 64-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::long2. The alignment of this type is greater than the alignment of + * simd_long1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_long2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) simd_long1 simd_long2; + +/*! @abstract A vector of three 64-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::long3. Note that vectors of this type are padded to have the same + * size and alignment as simd_long4. */ +typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) simd_long1 simd_long3; + +/*! @abstract A vector of four 64-bit signed (twos-complement) integers. + * @description In C++ and Metal, this type is also available as + * simd::long4. The alignment of this type is greater than the alignment of + * simd_long1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_long4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) simd_long1 simd_long4; + +/*! @abstract A vector of eight 64-bit signed (twos-complement) integers. + * @description In C++ this type is also available as simd::long8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of simd_long1; if you need to operate on data buffers + * that may not be suitably aligned, you should access them using + * simd_packed_long8 instead. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) simd_long1 simd_long8; + +/*! @abstract A scalar 64-bit unsigned integer. */ +#if defined __LP64__ +typedef unsigned long simd_ulong1; +#else +typedef unsigned long long simd_ulong1; +#endif + +/*! @abstract A vector of two 64-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::ulong2. The alignment of this type is greater than the alignment + * of simd_ulong1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_ulong2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) simd_ulong1 simd_ulong2; + +/*! @abstract A vector of three 64-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::ulong3. Note that vectors of this type are padded to have the same + * size and alignment as simd_ulong4. */ +typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) simd_ulong1 simd_ulong3; + +/*! @abstract A vector of four 64-bit unsigned integers. + * @description In C++ and Metal, this type is also available as + * simd::ulong4. The alignment of this type is greater than the alignment + * of simd_ulong1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_ulong4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) simd_ulong1 simd_ulong4; + +/*! @abstract A vector of eight 64-bit unsigned integers. + * @description In C++ this type is also available as simd::ulong8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of simd_ulong1; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd_packed_ulong8 instead. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) simd_ulong1 simd_ulong8; + +/*! @abstract A scalar 64-bit floating-point number. */ +typedef double simd_double1; + +/*! @abstract A vector of two 64-bit floating-point numbers. + * @description In C++ and Metal, this type is also available as + * simd::double2. The alignment of this type is greater than the alignment + * of double; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_double2 + * instead. */ +typedef __attribute__((__ext_vector_type__(2))) double simd_double2; + +/*! @abstract A vector of three 64-bit floating-point numbers. + * @description In C++ and Metal, this type is also available as + * simd::double3. Note that vectors of this type are padded to have the + * same size and alignment as simd_double4. */ +typedef __attribute__((__ext_vector_type__(3),__aligned__(16))) double simd_double3; + +/*! @abstract A vector of four 64-bit floating-point numbers. + * @description In C++ and Metal, this type is also available as + * simd::double4. The alignment of this type is greater than the alignment + * of double; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd_packed_double4 + * instead. */ +typedef __attribute__((__ext_vector_type__(4),__aligned__(16))) double simd_double4; + +/*! @abstract A vector of eight 64-bit floating-point numbers. + * @description In C++ this type is also available as simd::double8. This + * type is not available in Metal. The alignment of this type is greater + * than the alignment of double; if you need to operate on data buffers + * that may not be suitably aligned, you should access them using + * simd_packed_double8 instead. */ +typedef __attribute__((__ext_vector_type__(8),__aligned__(16))) double simd_double8; + +/* MARK: C++ vector types */ +#if defined __cplusplus +/*! @group C++ and Metal vector types + * @discussion Shorter type names available within the simd:: namespace. + * Each of these types is interchangable with the corresponding C type + * with the `simd_` prefix. */ +namespace simd { + /*! @abstract A scalar 8-bit signed (twos-complement) integer. + * @discussion In C and Objective-C, this type is available as + * simd_char1. */ +typedef ::simd_char1 char1; + + /*! @abstract A vector of two 8-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_char2. The alignment of this type is greater than the alignment + * of char; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_char2 + * instead. */ +typedef ::simd_char2 char2; + + /*! @abstract A vector of three 8-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_char3. Vectors of this type are padded to have the same size and + * alignment as simd_char4. */ +typedef ::simd_char3 char3; + + /*! @abstract A vector of four 8-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_char4. The alignment of this type is greater than the alignment + * of char; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_char4 + * instead. */ +typedef ::simd_char4 char4; + + /*! @abstract A vector of eight 8-bit signed (twos-complement) integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_char8. The alignment of this type is + * greater than the alignment of char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_char8 instead. */ +typedef ::simd_char8 char8; + + /*! @abstract A vector of sixteen 8-bit signed (twos-complement) integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_char16. The alignment of this type is + * greater than the alignment of char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_char16 instead. */ +typedef ::simd_char16 char16; + + /*! @abstract A vector of thirty-two 8-bit signed (twos-complement) + * integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_char32. The alignment of this type is + * greater than the alignment of char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_char32 instead. */ +typedef ::simd_char32 char32; + + /*! @abstract A vector of sixty-four 8-bit signed (twos-complement) + * integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_char64. The alignment of this type is + * greater than the alignment of char; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_char64 instead. */ +typedef ::simd_char64 char64; + + /*! @abstract A scalar 8-bit unsigned integer. + * @discussion In C and Objective-C, this type is available as + * simd_uchar1. */ +typedef ::simd_uchar1 uchar1; + + /*! @abstract A vector of two 8-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_uchar2. The alignment of this type is greater than the alignment + * of unsigned char; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd::packed_uchar2 + * instead. */ +typedef ::simd_uchar2 uchar2; + + /*! @abstract A vector of three 8-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_uchar3. Vectors of this type are padded to have the same size and + * alignment as simd_uchar4. */ +typedef ::simd_uchar3 uchar3; + + /*! @abstract A vector of four 8-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_uchar4. The alignment of this type is greater than the alignment + * of unsigned char; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd::packed_uchar4 + * instead. */ +typedef ::simd_uchar4 uchar4; + + /*! @abstract A vector of eight 8-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_uchar8. The alignment of this type is + * greater than the alignment of unsigned char; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_uchar8 instead. */ +typedef ::simd_uchar8 uchar8; + + /*! @abstract A vector of sixteen 8-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_uchar16. The alignment of this type is + * greater than the alignment of unsigned char; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_uchar16 instead. */ +typedef ::simd_uchar16 uchar16; + + /*! @abstract A vector of thirty-two 8-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_uchar32. The alignment of this type is + * greater than the alignment of unsigned char; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_uchar32 instead. */ +typedef ::simd_uchar32 uchar32; + + /*! @abstract A vector of sixty-four 8-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_uchar64. The alignment of this type is + * greater than the alignment of unsigned char; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_uchar64 instead. */ +typedef ::simd_uchar64 uchar64; + + /*! @abstract A scalar 16-bit signed (twos-complement) integer. + * @discussion In C and Objective-C, this type is available as + * simd_short1. */ +typedef ::simd_short1 short1; + + /*! @abstract A vector of two 16-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_short2. The alignment of this type is greater than the alignment + * of short; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_short2 + * instead. */ +typedef ::simd_short2 short2; + + /*! @abstract A vector of three 16-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_short3. Vectors of this type are padded to have the same size and + * alignment as simd_short4. */ +typedef ::simd_short3 short3; + + /*! @abstract A vector of four 16-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_short4. The alignment of this type is greater than the alignment + * of short; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_short4 + * instead. */ +typedef ::simd_short4 short4; + + /*! @abstract A vector of eight 16-bit signed (twos-complement) integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_short8. The alignment of this type is + * greater than the alignment of short; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_short8 instead. */ +typedef ::simd_short8 short8; + + /*! @abstract A vector of sixteen 16-bit signed (twos-complement) + * integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_short16. The alignment of this type is + * greater than the alignment of short; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_short16 instead. */ +typedef ::simd_short16 short16; + + /*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_short32. The alignment of this type is + * greater than the alignment of short; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_short32 instead. */ +typedef ::simd_short32 short32; + + /*! @abstract A scalar 16-bit unsigned integer. + * @discussion In C and Objective-C, this type is available as + * simd_ushort1. */ +typedef ::simd_ushort1 ushort1; + + /*! @abstract A vector of two 16-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_ushort2. The alignment of this type is greater than the alignment + * of unsigned short; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd::packed_ushort2 + * instead. */ +typedef ::simd_ushort2 ushort2; + + /*! @abstract A vector of three 16-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_ushort3. Vectors of this type are padded to have the same size + * and alignment as simd_ushort4. */ +typedef ::simd_ushort3 ushort3; + + /*! @abstract A vector of four 16-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_ushort4. The alignment of this type is greater than the alignment + * of unsigned short; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd::packed_ushort4 + * instead. */ +typedef ::simd_ushort4 ushort4; + + /*! @abstract A vector of eight 16-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_ushort8. The alignment of this type is + * greater than the alignment of unsigned short; if you need to operate + * on data buffers that may not be suitably aligned, you should access + * them using simd::packed_ushort8 instead. */ +typedef ::simd_ushort8 ushort8; + + /*! @abstract A vector of sixteen 16-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_ushort16. The alignment of this type is + * greater than the alignment of unsigned short; if you need to operate + * on data buffers that may not be suitably aligned, you should access + * them using simd::packed_ushort16 instead. */ +typedef ::simd_ushort16 ushort16; + + /*! @abstract A vector of thirty-two 16-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_ushort32. The alignment of this type is + * greater than the alignment of unsigned short; if you need to operate + * on data buffers that may not be suitably aligned, you should access + * them using simd::packed_ushort32 instead. */ +typedef ::simd_ushort32 ushort32; + + /*! @abstract A scalar 32-bit signed (twos-complement) integer. + * @discussion In C and Objective-C, this type is available as simd_int1. */ +typedef ::simd_int1 int1; + + /*! @abstract A vector of two 32-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as simd_int2. + * The alignment of this type is greater than the alignment of int; if + * you need to operate on data buffers that may not be suitably aligned, + * you should access them using simd::packed_int2 instead. */ +typedef ::simd_int2 int2; + + /*! @abstract A vector of three 32-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as simd_int3. + * Vectors of this type are padded to have the same size and alignment as + * simd_int4. */ +typedef ::simd_int3 int3; + + /*! @abstract A vector of four 32-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as simd_int4. + * The alignment of this type is greater than the alignment of int; if + * you need to operate on data buffers that may not be suitably aligned, + * you should access them using simd::packed_int4 instead. */ +typedef ::simd_int4 int4; + + /*! @abstract A vector of eight 32-bit signed (twos-complement) integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_int8. The alignment of this type is + * greater than the alignment of int; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_int8 instead. */ +typedef ::simd_int8 int8; + + /*! @abstract A vector of sixteen 32-bit signed (twos-complement) + * integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_int16. The alignment of this type is + * greater than the alignment of int; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_int16 instead. */ +typedef ::simd_int16 int16; + + /*! @abstract A scalar 32-bit unsigned integer. + * @discussion In C and Objective-C, this type is available as + * simd_uint1. */ +typedef ::simd_uint1 uint1; + + /*! @abstract A vector of two 32-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_uint2. The alignment of this type is greater than the alignment + * of unsigned int; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd::packed_uint2 + * instead. */ +typedef ::simd_uint2 uint2; + + /*! @abstract A vector of three 32-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_uint3. Vectors of this type are padded to have the same size and + * alignment as simd_uint4. */ +typedef ::simd_uint3 uint3; + + /*! @abstract A vector of four 32-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_uint4. The alignment of this type is greater than the alignment + * of unsigned int; if you need to operate on data buffers that may not + * be suitably aligned, you should access them using simd::packed_uint4 + * instead. */ +typedef ::simd_uint4 uint4; + + /*! @abstract A vector of eight 32-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_uint8. The alignment of this type is + * greater than the alignment of unsigned int; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_uint8 instead. */ +typedef ::simd_uint8 uint8; + + /*! @abstract A vector of sixteen 32-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_uint16. The alignment of this type is + * greater than the alignment of unsigned int; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_uint16 instead. */ +typedef ::simd_uint16 uint16; + + /*! @abstract A scalar 32-bit floating-point number. + * @discussion In C and Objective-C, this type is available as + * simd_float1. */ +typedef ::simd_float1 float1; + + /*! @abstract A vector of two 32-bit floating-point numbers. + * @description In C or Objective-C, this type is available as + * simd_float2. The alignment of this type is greater than the alignment + * of float; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_float2 + * instead. */ +typedef ::simd_float2 float2; + + /*! @abstract A vector of three 32-bit floating-point numbers. + * @description In C or Objective-C, this type is available as + * simd_float3. Vectors of this type are padded to have the same size and + * alignment as simd_float4. */ +typedef ::simd_float3 float3; + + /*! @abstract A vector of four 32-bit floating-point numbers. + * @description In C or Objective-C, this type is available as + * simd_float4. The alignment of this type is greater than the alignment + * of float; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_float4 + * instead. */ +typedef ::simd_float4 float4; + + /*! @abstract A vector of eight 32-bit floating-point numbers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_float8. The alignment of this type is + * greater than the alignment of float; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_float8 instead. */ +typedef ::simd_float8 float8; + + /*! @abstract A vector of sixteen 32-bit floating-point numbers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_float16. The alignment of this type is + * greater than the alignment of float; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_float16 instead. */ +typedef ::simd_float16 float16; + + /*! @abstract A scalar 64-bit signed (twos-complement) integer. + * @discussion In C and Objective-C, this type is available as + * simd_long1. */ +typedef ::simd_long1 long1; + + /*! @abstract A vector of two 64-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_long2. The alignment of this type is greater than the alignment + * of simd_long1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_long2 + * instead. */ +typedef ::simd_long2 long2; + + /*! @abstract A vector of three 64-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_long3. Vectors of this type are padded to have the same size and + * alignment as simd_long4. */ +typedef ::simd_long3 long3; + + /*! @abstract A vector of four 64-bit signed (twos-complement) integers. + * @description In C or Objective-C, this type is available as + * simd_long4. The alignment of this type is greater than the alignment + * of simd_long1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_long4 + * instead. */ +typedef ::simd_long4 long4; + + /*! @abstract A vector of eight 64-bit signed (twos-complement) integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_long8. The alignment of this type is + * greater than the alignment of simd_long1; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_long8 instead. */ +typedef ::simd_long8 long8; + + /*! @abstract A scalar 64-bit unsigned integer. + * @discussion In C and Objective-C, this type is available as + * simd_ulong1. */ +typedef ::simd_ulong1 ulong1; + + /*! @abstract A vector of two 64-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_ulong2. The alignment of this type is greater than the alignment + * of simd_ulong1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_ulong2 + * instead. */ +typedef ::simd_ulong2 ulong2; + + /*! @abstract A vector of three 64-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_ulong3. Vectors of this type are padded to have the same size and + * alignment as simd_ulong4. */ +typedef ::simd_ulong3 ulong3; + + /*! @abstract A vector of four 64-bit unsigned integers. + * @description In C or Objective-C, this type is available as + * simd_ulong4. The alignment of this type is greater than the alignment + * of simd_ulong1; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_ulong4 + * instead. */ +typedef ::simd_ulong4 ulong4; + + /*! @abstract A vector of eight 64-bit unsigned integers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_ulong8. The alignment of this type is + * greater than the alignment of simd_ulong1; if you need to operate on + * data buffers that may not be suitably aligned, you should access them + * using simd::packed_ulong8 instead. */ +typedef ::simd_ulong8 ulong8; + + /*! @abstract A scalar 64-bit floating-point number. + * @discussion In C and Objective-C, this type is available as + * simd_double1. */ +typedef ::simd_double1 double1; + + /*! @abstract A vector of two 64-bit floating-point numbers. + * @description In C or Objective-C, this type is available as + * simd_double2. The alignment of this type is greater than the alignment + * of double; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_double2 + * instead. */ +typedef ::simd_double2 double2; + + /*! @abstract A vector of three 64-bit floating-point numbers. + * @description In C or Objective-C, this type is available as + * simd_double3. Vectors of this type are padded to have the same size + * and alignment as simd_double4. */ +typedef ::simd_double3 double3; + + /*! @abstract A vector of four 64-bit floating-point numbers. + * @description In C or Objective-C, this type is available as + * simd_double4. The alignment of this type is greater than the alignment + * of double; if you need to operate on data buffers that may not be + * suitably aligned, you should access them using simd::packed_double4 + * instead. */ +typedef ::simd_double4 double4; + + /*! @abstract A vector of eight 64-bit floating-point numbers. + * @description This type is not available in Metal. In C or Objective-C, + * this type is available as simd_double8. The alignment of this type is + * greater than the alignment of double; if you need to operate on data + * buffers that may not be suitably aligned, you should access them using + * simd::packed_double8 instead. */ +typedef ::simd_double8 double8; + +} /* namespace simd:: */ +#endif /* __cplusplus */ + +/* MARK: Deprecated vector types */ +/*! @group Deprecated vector types + * @discussion These are the original types used by earlier versions of the + * simd library; they are provided here for compatability with existing source + * files. Use the new ("simd_"-prefixed) types for future development. */ + +/*! @abstract A vector of two 8-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_char2 or + * simd::char2 instead. */ +typedef simd_char2 vector_char2; + +/*! @abstract A vector of three 8-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_char3 or + * simd::char3 instead. */ +typedef simd_char3 vector_char3; + +/*! @abstract A vector of four 8-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_char4 or + * simd::char4 instead. */ +typedef simd_char4 vector_char4; + +/*! @abstract A vector of eight 8-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_char8 or + * simd::char8 instead. */ +typedef simd_char8 vector_char8; + +/*! @abstract A vector of sixteen 8-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_char16 or + * simd::char16 instead. */ +typedef simd_char16 vector_char16; + +/*! @abstract A vector of thirty-two 8-bit signed (twos-complement) + * integers. + * @description This type is deprecated; you should use simd_char32 or + * simd::char32 instead. */ +typedef simd_char32 vector_char32; + +/*! @abstract A vector of two 8-bit unsigned integers. + * @description This type is deprecated; you should use simd_uchar2 or + * simd::uchar2 instead. */ +typedef simd_uchar2 vector_uchar2; + +/*! @abstract A vector of three 8-bit unsigned integers. + * @description This type is deprecated; you should use simd_uchar3 or + * simd::uchar3 instead. */ +typedef simd_uchar3 vector_uchar3; + +/*! @abstract A vector of four 8-bit unsigned integers. + * @description This type is deprecated; you should use simd_uchar4 or + * simd::uchar4 instead. */ +typedef simd_uchar4 vector_uchar4; + +/*! @abstract A vector of eight 8-bit unsigned integers. + * @description This type is deprecated; you should use simd_uchar8 or + * simd::uchar8 instead. */ +typedef simd_uchar8 vector_uchar8; + +/*! @abstract A vector of sixteen 8-bit unsigned integers. + * @description This type is deprecated; you should use simd_uchar16 or + * simd::uchar16 instead. */ +typedef simd_uchar16 vector_uchar16; + +/*! @abstract A vector of thirty-two 8-bit unsigned integers. + * @description This type is deprecated; you should use simd_uchar32 or + * simd::uchar32 instead. */ +typedef simd_uchar32 vector_uchar32; + +/*! @abstract A vector of two 16-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_short2 or + * simd::short2 instead. */ +typedef simd_short2 vector_short2; + +/*! @abstract A vector of three 16-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_short3 or + * simd::short3 instead. */ +typedef simd_short3 vector_short3; + +/*! @abstract A vector of four 16-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_short4 or + * simd::short4 instead. */ +typedef simd_short4 vector_short4; + +/*! @abstract A vector of eight 16-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_short8 or + * simd::short8 instead. */ +typedef simd_short8 vector_short8; + +/*! @abstract A vector of sixteen 16-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_short16 or + * simd::short16 instead. */ +typedef simd_short16 vector_short16; + +/*! @abstract A vector of thirty-two 16-bit signed (twos-complement) + * integers. + * @description This type is deprecated; you should use simd_short32 or + * simd::short32 instead. */ +typedef simd_short32 vector_short32; + +/*! @abstract A vector of two 16-bit unsigned integers. + * @description This type is deprecated; you should use simd_ushort2 or + * simd::ushort2 instead. */ +typedef simd_ushort2 vector_ushort2; + +/*! @abstract A vector of three 16-bit unsigned integers. + * @description This type is deprecated; you should use simd_ushort3 or + * simd::ushort3 instead. */ +typedef simd_ushort3 vector_ushort3; + +/*! @abstract A vector of four 16-bit unsigned integers. + * @description This type is deprecated; you should use simd_ushort4 or + * simd::ushort4 instead. */ +typedef simd_ushort4 vector_ushort4; + +/*! @abstract A vector of eight 16-bit unsigned integers. + * @description This type is deprecated; you should use simd_ushort8 or + * simd::ushort8 instead. */ +typedef simd_ushort8 vector_ushort8; + +/*! @abstract A vector of sixteen 16-bit unsigned integers. + * @description This type is deprecated; you should use simd_ushort16 or + * simd::ushort16 instead. */ +typedef simd_ushort16 vector_ushort16; + +/*! @abstract A vector of thirty-two 16-bit unsigned integers. + * @description This type is deprecated; you should use simd_ushort32 or + * simd::ushort32 instead. */ +typedef simd_ushort32 vector_ushort32; + +/*! @abstract A vector of two 32-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_int2 or + * simd::int2 instead. */ +typedef simd_int2 vector_int2; + +/*! @abstract A vector of three 32-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_int3 or + * simd::int3 instead. */ +typedef simd_int3 vector_int3; + +/*! @abstract A vector of four 32-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_int4 or + * simd::int4 instead. */ +typedef simd_int4 vector_int4; + +/*! @abstract A vector of eight 32-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_int8 or + * simd::int8 instead. */ +typedef simd_int8 vector_int8; + +/*! @abstract A vector of sixteen 32-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_int16 or + * simd::int16 instead. */ +typedef simd_int16 vector_int16; + +/*! @abstract A vector of two 32-bit unsigned integers. + * @description This type is deprecated; you should use simd_uint2 or + * simd::uint2 instead. */ +typedef simd_uint2 vector_uint2; + +/*! @abstract A vector of three 32-bit unsigned integers. + * @description This type is deprecated; you should use simd_uint3 or + * simd::uint3 instead. */ +typedef simd_uint3 vector_uint3; + +/*! @abstract A vector of four 32-bit unsigned integers. + * @description This type is deprecated; you should use simd_uint4 or + * simd::uint4 instead. */ +typedef simd_uint4 vector_uint4; + +/*! @abstract A vector of eight 32-bit unsigned integers. + * @description This type is deprecated; you should use simd_uint8 or + * simd::uint8 instead. */ +typedef simd_uint8 vector_uint8; + +/*! @abstract A vector of sixteen 32-bit unsigned integers. + * @description This type is deprecated; you should use simd_uint16 or + * simd::uint16 instead. */ +typedef simd_uint16 vector_uint16; + +/*! @abstract A vector of two 32-bit floating-point numbers. + * @description This type is deprecated; you should use simd_float2 or + * simd::float2 instead. */ +typedef simd_float2 vector_float2; + +/*! @abstract A vector of three 32-bit floating-point numbers. + * @description This type is deprecated; you should use simd_float3 or + * simd::float3 instead. */ +typedef simd_float3 vector_float3; + +/*! @abstract A vector of four 32-bit floating-point numbers. + * @description This type is deprecated; you should use simd_float4 or + * simd::float4 instead. */ +typedef simd_float4 vector_float4; + +/*! @abstract A vector of eight 32-bit floating-point numbers. + * @description This type is deprecated; you should use simd_float8 or + * simd::float8 instead. */ +typedef simd_float8 vector_float8; + +/*! @abstract A vector of sixteen 32-bit floating-point numbers. + * @description This type is deprecated; you should use simd_float16 or + * simd::float16 instead. */ +typedef simd_float16 vector_float16; + +/*! @abstract A scalar 64-bit signed (twos-complement) integer. + * @description This type is deprecated; you should use simd_long1 or + * simd::long1 instead. */ +typedef simd_long1 vector_long1; + +/*! @abstract A vector of two 64-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_long2 or + * simd::long2 instead. */ +typedef simd_long2 vector_long2; + +/*! @abstract A vector of three 64-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_long3 or + * simd::long3 instead. */ +typedef simd_long3 vector_long3; + +/*! @abstract A vector of four 64-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_long4 or + * simd::long4 instead. */ +typedef simd_long4 vector_long4; + +/*! @abstract A vector of eight 64-bit signed (twos-complement) integers. + * @description This type is deprecated; you should use simd_long8 or + * simd::long8 instead. */ +typedef simd_long8 vector_long8; + +/*! @abstract A scalar 64-bit unsigned integer. + * @description This type is deprecated; you should use simd_ulong1 or + * simd::ulong1 instead. */ +typedef simd_ulong1 vector_ulong1; + +/*! @abstract A vector of two 64-bit unsigned integers. + * @description This type is deprecated; you should use simd_ulong2 or + * simd::ulong2 instead. */ +typedef simd_ulong2 vector_ulong2; + +/*! @abstract A vector of three 64-bit unsigned integers. + * @description This type is deprecated; you should use simd_ulong3 or + * simd::ulong3 instead. */ +typedef simd_ulong3 vector_ulong3; + +/*! @abstract A vector of four 64-bit unsigned integers. + * @description This type is deprecated; you should use simd_ulong4 or + * simd::ulong4 instead. */ +typedef simd_ulong4 vector_ulong4; + +/*! @abstract A vector of eight 64-bit unsigned integers. + * @description This type is deprecated; you should use simd_ulong8 or + * simd::ulong8 instead. */ +typedef simd_ulong8 vector_ulong8; + +/*! @abstract A vector of two 64-bit floating-point numbers. + * @description This type is deprecated; you should use simd_double2 or + * simd::double2 instead. */ +typedef simd_double2 vector_double2; + +/*! @abstract A vector of three 64-bit floating-point numbers. + * @description This type is deprecated; you should use simd_double3 or + * simd::double3 instead. */ +typedef simd_double3 vector_double3; + +/*! @abstract A vector of four 64-bit floating-point numbers. + * @description This type is deprecated; you should use simd_double4 or + * simd::double4 instead. */ +typedef simd_double4 vector_double4; + +/*! @abstract A vector of eight 64-bit floating-point numbers. + * @description This type is deprecated; you should use simd_double8 or + * simd::double8 instead. */ +typedef simd_double8 vector_double8; + +# endif /* SIMD_COMPILER_HAS_REQUIRED_FEATURES */ +#endif diff --git a/webrtc-kmp/build.gradle.kts b/webrtc-kmp/build.gradle.kts index b1148f53..8fad17da 100644 --- a/webrtc-kmp/build.gradle.kts +++ b/webrtc-kmp/build.gradle.kts @@ -1,10 +1,12 @@ import org.jetbrains.kotlin.gradle.ExperimentalKotlinGradlePluginApi +import org.jetbrains.kotlin.gradle.ExperimentalWasmDsl +import org.jetbrains.kotlin.gradle.dsl.JvmTarget import org.jetbrains.kotlin.gradle.plugin.KotlinSourceSetTree -import org.jetbrains.kotlin.gradle.targets.js.dsl.ExperimentalWasmDsl import org.jetbrains.kotlin.gradle.targets.js.webpack.KotlinWebpackConfig plugins { - id("webrtc.multiplatform") + alias(libs.plugins.kotlinMultiplatform) + alias(libs.plugins.androidLibrary) kotlin("native.cocoapods") id("maven-publish") id("signing") @@ -15,6 +17,11 @@ group = "com.shepeliev" version = System.getenv("VERSION") ?: "0.0.0" kotlin { + @OptIn(ExperimentalKotlinGradlePluginApi::class) + compilerOptions { + freeCompilerArgs.add("-Xexpect-actual-classes") + } + cocoapods { version = project.version.toString() summary = "WebRTC Kotlin Multiplatform SDK" @@ -27,15 +34,21 @@ kotlin { version = libs.versions.webrtc.ios.sdk.get() moduleName = "WebRTC" packageName = "WebRTC" + + // workaround for https://youtrack.jetbrains.com/issue/KT-69094 + extraOpts += listOf("-compiler-option", "-ivfsoverlay", "-compiler-option", "../vfsoverlay/overlay.yaml") } } + @OptIn(ExperimentalKotlinGradlePluginApi::class) androidTarget { publishAllLibraryVariants() - @OptIn(ExperimentalKotlinGradlePluginApi::class) instrumentedTestVariant { sourceSetTree.set(KotlinSourceSetTree.test) } + compilerOptions { + jvmTarget = JvmTarget.JVM_1_8 + } } iosX64() @@ -108,11 +121,24 @@ kotlin { android { namespace = "com.shepeliev.webrtckmp" + compileSdk = libs.versions.compileSdk.get().toInt() + sourceSets["main"].manifest.srcFile("src/androidMain/AndroidManifest.xml") + sourceSets["main"].res.srcDir("src/androidMain/res") + defaultConfig { - targetSdk = libs.versions.targetSdk.get().toInt() + minSdk = libs.versions.minSdk.get().toInt() testInstrumentationRunner = "androidx.test.runner.AndroidJUnitRunner" } + compileOptions { + sourceCompatibility = JavaVersion.VERSION_1_8 + targetCompatibility = JavaVersion.VERSION_1_8 + } + + testOptions { + targetSdk = libs.versions.targetSdk.get().toInt() + } + dependencies { androidTestImplementation(libs.androidx.test.core) androidTestImplementation(libs.androidx.test.runner)