From 988e09f27b9b04a43d45d10f92782e0092ee27a9 Mon Sep 17 00:00:00 2001 From: Jia Tan Date: Fri, 20 Oct 2023 19:17:46 +0800 Subject: [PATCH] liblzma: Move is_clmul_supported() back to crc_common.h. This partially reverts creating crc_clmul.c (8c0f9376f58c0696d5d6719705164d35542dd891) where is_clmul_supported() was moved, extern'ed, and renamed to lzma_is_clmul_supported(). This caused a problem when the function call to lzma_is_clmul_supported() results in a call through the PLT. ifunc resolvers run very early in the dynamic loading sequence, so the PLT may not be setup properly at this point. Whether the PLT is used or not for lzma_is_clmul_supported() depened upon the compiler-toolchain used and flags. In liblzma compiled with GCC, for instance, GCC will go through the PLT for function calls internal to liblzma if the version scripts and symbol visibility hiding are not used. If lazy-binding is disabled, then it would have made any program linked with liblzma fail during dynamic loading in the ifunc resolver. --- src/liblzma/check/crc32_fast.c | 2 +- src/liblzma/check/crc64_fast.c | 2 +- src/liblzma/check/crc_clmul.c | 45 ----------------------------- src/liblzma/check/crc_common.h | 52 ++++++++++++++++++++++++++++++++-- 4 files changed, 51 insertions(+), 50 deletions(-) diff --git a/src/liblzma/check/crc32_fast.c b/src/liblzma/check/crc32_fast.c index add93d55a..736590497 100644 --- a/src/liblzma/check/crc32_fast.c +++ b/src/liblzma/check/crc32_fast.c @@ -130,7 +130,7 @@ typedef uint32_t (*crc32_func_type)( static crc32_func_type crc32_resolve(void) { - return lzma_is_clmul_supported() ? &lzma_crc32_clmul : &crc32_generic; + return is_clmul_supported() ? &lzma_crc32_clmul : &crc32_generic; } #if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__) diff --git a/src/liblzma/check/crc64_fast.c b/src/liblzma/check/crc64_fast.c index 8acdc7135..4e6633db5 100644 --- a/src/liblzma/check/crc64_fast.c +++ b/src/liblzma/check/crc64_fast.c @@ -94,7 +94,7 @@ typedef uint64_t (*crc64_func_type)( static crc64_func_type crc64_resolve(void) { - return lzma_is_clmul_supported() ? &lzma_crc64_clmul : &crc64_generic; + return is_clmul_supported() ? &lzma_crc64_clmul : &crc64_generic; } #if defined(HAVE_FUNC_ATTRIBUTE_IFUNC) && defined(__clang__) diff --git a/src/liblzma/check/crc_clmul.c b/src/liblzma/check/crc_clmul.c index 7110fd7ef..640415e7c 100644 --- a/src/liblzma/check/crc_clmul.c +++ b/src/liblzma/check/crc_clmul.c @@ -372,48 +372,3 @@ lzma_crc64_clmul(const uint8_t *buf, size_t size, uint64_t crc) && defined(_M_IX86) # pragma optimize("", on) #endif - - -//////////////////////// -// Detect CPU support // -//////////////////////// - -extern bool -lzma_is_clmul_supported(void) -{ - int success = 1; - uint32_t r[4]; // eax, ebx, ecx, edx - -#if defined(_MSC_VER) - // This needs with MSVC. ICC has it as a built-in - // on all platforms. - __cpuid(r, 1); -#elif defined(HAVE_CPUID_H) - // Compared to just using __asm__ to run CPUID, this also checks - // that CPUID is supported and saves and restores ebx as that is - // needed with GCC < 5 with position-independent code (PIC). - success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); -#else - // Just a fallback that shouldn't be needed. - __asm__("cpuid\n\t" - : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) - : "a"(1), "c"(0)); -#endif - - // Returns true if these are supported: - // CLMUL (bit 1 in ecx) - // SSSE3 (bit 9 in ecx) - // SSE4.1 (bit 19 in ecx) - const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); - return success && (r[2] & ecx_mask) == ecx_mask; - - // Alternative methods that weren't used: - // - ICC's _may_i_use_cpu_feature: the other methods should work too. - // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") - // - // CPUID decding is needed with MSVC anyway and older GCC. This keeps - // the feature checks in the build system simpler too. The nice thing - // about __builtin_cpu_supports would be that it generates very short - // code as is it only reads a variable set at startup but a few bytes - // doesn't matter here. -} diff --git a/src/liblzma/check/crc_common.h b/src/liblzma/check/crc_common.h index 51ddd9d5c..1783b5e76 100644 --- a/src/liblzma/check/crc_common.h +++ b/src/liblzma/check/crc_common.h @@ -99,11 +99,57 @@ # elif defined(HAVE_CPUID_H) # include # endif + +// is_clmul_supported() must be inlined in this header file because the +// ifunc resolver function may not support calling a function in another +// translation unit. Depending on compiler-toolchain and flags, a call to +// a function defined in another translation unit could result in a +// reference to the PLT, which is unsafe to do in an ifunc resolver. The +// ifunc resolver runs very early when loading a shared library, so the PLT +// entries may not be setup at that time. Inlining this function duplicates +// the function body in crc32_resolve() and crc64_resolve(), but this is +// acceptable because the function results in very few instructions. +static inline bool +is_clmul_supported(void) +{ + int success = 1; + uint32_t r[4]; // eax, ebx, ecx, edx + +#if defined(_MSC_VER) + // This needs with MSVC. ICC has it as a built-in + // on all platforms. + __cpuid(r, 1); +#elif defined(HAVE_CPUID_H) + // Compared to just using __asm__ to run CPUID, this also checks + // that CPUID is supported and saves and restores ebx as that is + // needed with GCC < 5 with position-independent code (PIC). + success = __get_cpuid(1, &r[0], &r[1], &r[2], &r[3]); +#else + // Just a fallback that shouldn't be needed. + __asm__("cpuid\n\t" + : "=a"(r[0]), "=b"(r[1]), "=c"(r[2]), "=d"(r[3]) + : "a"(1), "c"(0)); #endif -/// Detect at runtime if the CPU supports the x86 CLMUL instruction when -/// both the generic and CLMUL implementations are built. -extern bool lzma_is_clmul_supported(void); + // Returns true if these are supported: + // CLMUL (bit 1 in ecx) + // SSSE3 (bit 9 in ecx) + // SSE4.1 (bit 19 in ecx) + const uint32_t ecx_mask = (1 << 1) | (1 << 9) | (1 << 19); + return success && (r[2] & ecx_mask) == ecx_mask; + + // Alternative methods that weren't used: + // - ICC's _may_i_use_cpu_feature: the other methods should work too. + // - GCC >= 6 / Clang / ICX __builtin_cpu_supports("pclmul") + // + // CPUID decding is needed with MSVC anyway and older GCC. This keeps + // the feature checks in the build system simpler too. The nice thing + // about __builtin_cpu_supports would be that it generates very short + // code as is it only reads a variable set at startup but a few bytes + // doesn't matter here. +} + +#endif /// CRC32 implemented with the x86 CLMUL instruction. extern uint32_t lzma_crc32_clmul(const uint8_t *buf, size_t size,