This is an automated email from the ASF dual-hosted git repository.
pitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/main by this push:
new ca47cd10b6 GH-45331: [C++] Use xsimd for CPU feature detection (#49940)
ca47cd10b6 is described below
commit ca47cd10b651a8ff5fe44fbb23f7e01dc982a57d
Author: Antoine Prouvost <[email protected]>
AuthorDate: Wed Jun 10 10:53:34 2026 +0200
GH-45331: [C++] Use xsimd for CPU feature detection (#49940)
### Rationale for this change
Use xsimd cpu feature instead of maitaining them here.
Stacked on GH-49922.
Should also handle issue from GH-30368.
### What changes are included in this PR?
- Use xsimd for CPu feature detection, cache sizes are still using the same
implementation
- We are loosing CPU model name (which was unused)
- Simplify CpuInfo (remove Pimpl pattern)
### Are these changes tested?
in CI
### Are there any user-facing changes?
No
* GitHub Issue: #45331
Authored-by: AntoinePrv <[email protected]>
Signed-off-by: Antoine Pitrou <[email protected]>
---
cpp/cmake_modules/ThirdpartyToolchain.cmake | 34 +-
cpp/src/arrow/CMakeLists.txt | 30 +-
cpp/src/arrow/util/CMakeLists.txt | 8 +-
cpp/src/arrow/util/cpu_info.cc | 468 ++++++++--------------------
cpp/src/arrow/util/cpu_info.h | 35 ++-
cpp/src/gandiva/CMakeLists.txt | 6 +-
cpp/src/parquet/CMakeLists.txt | 8 +-
7 files changed, 181 insertions(+), 408 deletions(-)
diff --git a/cpp/cmake_modules/ThirdpartyToolchain.cmake
b/cpp/cmake_modules/ThirdpartyToolchain.cmake
index fd1752928e..9a03030115 100644
--- a/cpp/cmake_modules/ThirdpartyToolchain.cmake
+++ b/cpp/cmake_modules/ThirdpartyToolchain.cmake
@@ -2844,28 +2844,20 @@ macro(build_xsimd)
set(XSIMD_VENDORED TRUE)
endmacro()
-if((NOT ARROW_SIMD_LEVEL STREQUAL "NONE") OR (NOT ARROW_RUNTIME_SIMD_LEVEL
STREQUAL "NONE"
- ))
- set(ARROW_USE_XSIMD TRUE)
+# Xsimd is mandatory as its CPU feature detection is the basis for Arrow
CpuInfo
+resolve_dependency(xsimd
+ FORCE_ANY_NEWER_VERSION
+ TRUE
+ IS_RUNTIME_DEPENDENCY
+ FALSE
+ REQUIRED_VERSION
+ "14.2.0")
+
+if(xsimd_SOURCE STREQUAL "BUNDLED")
+ set(ARROW_XSIMD arrow::xsimd)
else()
- set(ARROW_USE_XSIMD FALSE)
-endif()
-
-if(ARROW_USE_XSIMD)
- resolve_dependency(xsimd
- FORCE_ANY_NEWER_VERSION
- TRUE
- IS_RUNTIME_DEPENDENCY
- FALSE
- REQUIRED_VERSION
- "14.2.0")
-
- if(xsimd_SOURCE STREQUAL "BUNDLED")
- set(ARROW_XSIMD arrow::xsimd)
- else()
- message(STATUS "xsimd found. Headers: ${xsimd_INCLUDE_DIRS}")
- set(ARROW_XSIMD xsimd)
- endif()
+ message(STATUS "xsimd found. Headers: ${xsimd_INCLUDE_DIRS}")
+ set(ARROW_XSIMD xsimd)
endif()
macro(build_zlib)
diff --git a/cpp/src/arrow/CMakeLists.txt b/cpp/src/arrow/CMakeLists.txt
index 45cd7e8381..8d3cf9682a 100644
--- a/cpp/src/arrow/CMakeLists.txt
+++ b/cpp/src/arrow/CMakeLists.txt
@@ -601,6 +601,9 @@ arrow_add_object_library(ARROW_UTIL ${ARROW_UTIL_SRCS})
foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
target_compile_definitions(${ARROW_UTIL_TARGET} PRIVATE URI_STATIC_BUILD)
endforeach()
+foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
+ target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD})
+endforeach()
if(ARROW_USE_BOOST)
foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
@@ -617,11 +620,6 @@ if(ARROW_USE_OPENSSL)
target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_OPENSSL_LIBS})
endforeach()
endif()
-if(ARROW_USE_XSIMD)
- foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
- target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_XSIMD})
- endforeach()
-endif()
if(ARROW_WITH_BROTLI)
foreach(ARROW_UTIL_TARGET ${ARROW_UTIL_TARGETS})
target_link_libraries(${ARROW_UTIL_TARGET} PRIVATE ${ARROW_BROTLI_LIBS})
@@ -734,11 +732,9 @@ if(ARROW_CSV)
csv/parser.cc
csv/reader.cc
csv/writer.cc)
- if(ARROW_USE_XSIMD)
- foreach(ARROW_CSV_TARGET ${ARROW_CSV_TARGETS})
- target_link_libraries(${ARROW_CSV_TARGET} PRIVATE ${ARROW_XSIMD})
- endforeach()
- endif()
+ foreach(ARROW_CSV_TARGET ${ARROW_CSV_TARGETS})
+ target_link_libraries(${ARROW_CSV_TARGET} PRIVATE ${ARROW_XSIMD})
+ endforeach()
list(APPEND ARROW_TESTING_SRCS csv/test_common.cc)
else()
@@ -855,15 +851,13 @@ if(ARROW_COMPUTE)
list(APPEND ARROW_COMPUTE_SHARED_INSTALL_INTERFACE_LIBS Arrow::arrow_shared)
list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS arrow_static)
list(APPEND ARROW_COMPUTE_SHARED_LINK_LIBS arrow_shared)
+ list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS ${ARROW_XSIMD})
+ list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
if(ARROW_USE_BOOST)
list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS Boost::headers)
list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS Boost::headers)
endif()
- if(ARROW_USE_XSIMD)
- list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS ${ARROW_XSIMD})
- list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
- endif()
if(ARROW_WITH_OPENTELEMETRY)
list(APPEND ARROW_COMPUTE_STATIC_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
list(APPEND ARROW_COMPUTE_SHARED_PRIVATE_LINK_LIBS
${ARROW_OPENTELEMETRY_LIBS})
@@ -910,11 +904,9 @@ endif()
arrow_add_object_library(ARROW_COMPUTE_CORE ${ARROW_COMPUTE_SRCS})
-if(ARROW_USE_XSIMD)
- foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS})
- target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} PRIVATE ${ARROW_XSIMD})
- endforeach()
-endif()
+foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS})
+ target_link_libraries(${ARROW_COMPUTE_CORE_TARGET} PRIVATE ${ARROW_XSIMD})
+endforeach()
if(ARROW_WITH_OPENTELEMETRY)
foreach(ARROW_COMPUTE_CORE_TARGET ${ARROW_COMPUTE_CORE_TARGETS})
target_link_libraries(${ARROW_COMPUTE_CORE_TARGET}
diff --git a/cpp/src/arrow/util/CMakeLists.txt
b/cpp/src/arrow/util/CMakeLists.txt
index deb3e9e3fb..8b05b4b6e6 100644
--- a/cpp/src/arrow/util/CMakeLists.txt
+++ b/cpp/src/arrow/util/CMakeLists.txt
@@ -39,9 +39,7 @@ else()
endif()
set(ARROW_UTILITY_TEST_LINK_LIBS Boost::headers)
-if(ARROW_USE_XSIMD)
- list(APPEND ARROW_UTILITY_TEST_LINK_LIBS ${ARROW_XSIMD})
-endif()
+list(APPEND ARROW_UTILITY_TEST_LINK_LIBS ${ARROW_XSIMD})
if(ARROW_WITH_OPENTELEMETRY)
list(APPEND ARROW_UTILITY_TEST_LINK_LIBS ${ARROW_OPENTELEMETRY_LIBS})
endif()
@@ -135,9 +133,7 @@ add_arrow_benchmark(tdigest_benchmark)
add_arrow_benchmark(thread_pool_benchmark)
add_arrow_benchmark(trie_benchmark)
set(ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS)
-if(ARROW_USE_XSIMD)
- list(APPEND ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS ${ARROW_XSIMD})
-endif()
+list(APPEND ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS ${ARROW_XSIMD})
add_arrow_benchmark(utf8_util_benchmark EXTRA_LINK_LIBS
${ARROW_BENCHMARK_UTF8_UTIL_LINK_LIBS})
add_arrow_benchmark(value_parsing_benchmark)
diff --git a/cpp/src/arrow/util/cpu_info.cc b/cpp/src/arrow/util/cpu_info.cc
index 2cc1ac802b..2f7cd2408d 100644
--- a/cpp/src/arrow/util/cpu_info.cc
+++ b/cpp/src/arrow/util/cpu_info.cc
@@ -17,64 +17,107 @@
// From Apache Impala (incubating) as of 2016-01-29.
-#include "arrow/util/cpu_info.h"
-
-#ifdef __APPLE__
-# include <sys/sysctl.h>
-#endif
-
-#ifndef _MSC_VER
-# include <unistd.h>
-#endif
-
-#ifdef _WIN32
-# include <intrin.h>
-
-# include "arrow/util/windows_compatibility.h"
-#endif
-
#include <algorithm>
#include <array>
-#include <bitset>
#include <cctype>
#include <cerrno>
#include <cstdint>
-#include <fstream>
-#include <memory>
#include <optional>
#include <string>
#include <thread>
+#include <xsimd/xsimd.hpp>
+
#include "arrow/result.h"
+#include "arrow/util/cpu_info.h"
#include "arrow/util/io_util.h"
#include "arrow/util/logging_internal.h"
#include "arrow/util/string.h"
-#undef CPUINFO_ARCH_X86
-#undef CPUINFO_ARCH_ARM
-#undef CPUINFO_ARCH_PPC
-
-#if defined(__i386) || defined(_M_IX86) || defined(__x86_64__) ||
defined(_M_X64)
-# define CPUINFO_ARCH_X86
-#elif defined(_M_ARM64) || defined(__aarch64__) || defined(__arm64__)
-# define CPUINFO_ARCH_ARM
-#elif defined(__PPC64__) || defined(__PPC64LE__) || defined(__ppc64__) || \
- defined(__powerpc64__)
-# define CPUINFO_ARCH_PPC
+#ifdef __linux__
+# include <fstream>
#endif
-namespace arrow {
-namespace internal {
+#ifdef __APPLE__
+# include <sys/sysctl.h>
+#endif
+
+#ifndef _MSC_VER
+# include <unistd.h>
+#endif
+
+#ifdef _WIN32
+# include <intrin.h>
+
+# include "arrow/util/windows_compatibility.h"
+#endif
+
+namespace arrow::internal {
namespace {
-constexpr int kCacheLevels = static_cast<int>(CpuInfo::CacheLevel::Last) + 1;
+void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor) {
+ const auto cpu = xsimd::cpu_features();
+
+ *hardware_flags |= cpu.popcnt() ? CpuInfo::POPCNT : 0;
+ *hardware_flags |= cpu.bmi1() ? CpuInfo::BMI1 : 0;
+ *hardware_flags |= cpu.bmi2() ? CpuInfo::BMI2 : 0;
+
+ // SSE
+ *hardware_flags |= cpu.ssse3() ? CpuInfo::SSSE3 : 0;
+ *hardware_flags |= cpu.sse4_1() ? CpuInfo::SSE4_1 : 0;
+ *hardware_flags |= cpu.sse4_2() ? CpuInfo::SSE4_2 : 0;
+ // AVX
+ *hardware_flags |= cpu.avx() ? CpuInfo::AVX : 0;
+ *hardware_flags |= cpu.avx2() ? CpuInfo::AVX2 : 0;
+ // AVX 512
+ const bool avx512f = cpu.avx512f();
+ *hardware_flags |= cpu.avx512f() ? CpuInfo::AVX512F : 0;
+ *hardware_flags |= cpu.avx512cd() ? CpuInfo::AVX512CD : 0;
+ *hardware_flags |= cpu.avx512dq() ? CpuInfo::AVX512DQ : 0;
+ *hardware_flags |= cpu.avx512bw() ? CpuInfo::AVX512BW : 0;
+ // TODO(xsimd): Missing in xsimd 14.2.0 but fixed afterwards.
+ // Can be replaced with the following (no `if(avx512f)` required).
+ // *hardware_flags |= cpu.avx512vl() ? CpuInfo::AVX512VL : 0;
+ if (avx512f) {
+ const auto cpu_x86 = xsimd::x86_cpu_features_backend_default();
+ auto constexpr avx512vl = static_cast<xsimd::x86_cpuid_leaf7::ebx>(31);
+ *hardware_flags |= cpu_x86.leaf7().all_bits_set<avx512vl>() ?
CpuInfo::AVX512VL : 0;
+ }
+
+ // Neon
+ *hardware_flags |= cpu.neon64() ? CpuInfo::ASIMD : 0;
+ // SVE and length
+ // Running SVE128 on a SVE256 machine is more tricky than the x86 equivalent
of
+ // running SSE code on an AVX machine and requires to explicitly change the
+ // vector length using `prctl` (per thread setting).
+ const bool sve = cpu.sve();
+ const auto sve_size = cpu.sve_size_bytes();
+ *hardware_flags |= sve ? CpuInfo::SVE : 0;
+ *hardware_flags |= (sve && sve_size == 16) ? CpuInfo::SVE128 : 0;
+ *hardware_flags |= (sve && sve_size == 32) ? CpuInfo::SVE256 : 0;
+ *hardware_flags |= (sve && sve_size == 64) ? CpuInfo::SVE512 : 0;
+
+ // x86 only
+ switch (cpu.known_manufacturer()) {
+ case (xsimd::x86_manufacturer::intel):
+ *vendor = CpuInfo::Vendor::Intel;
+ break;
+ case (xsimd::x86_manufacturer::amd):
+ *vendor = CpuInfo::Vendor::AMD;
+ break;
+ default: {
+ }
+ }
+}
//============================== OS Dependent ==============================//
#if defined(_WIN32)
//------------------------------ WINDOWS ------------------------------//
-void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
+template <std::size_t N>
+void OsRetrieveCacheSize(std::array<int64_t, N>* cache_sizes) {
+ static_assert(N >= 3);
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer = nullptr;
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer_position = nullptr;
DWORD buffer_size = 0;
@@ -110,7 +153,8 @@ void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>*
cache_sizes) {
while (offset + sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION) <= buffer_size)
{
if (RelationCache == buffer_position->Relationship) {
PCACHE_DESCRIPTOR cache = &buffer_position->Cache;
- if (cache->Level >= 1 && cache->Level <= kCacheLevels) {
+ using level_t = decltype(cache->Level);
+ if (cache->Level >= 1 && cache->Level <= static_cast<level_t>(N)) {
const int64_t current = (*cache_sizes)[cache->Level - 1];
(*cache_sizes)[cache->Level - 1] = std::max<int64_t>(current,
cache->Size);
}
@@ -122,111 +166,6 @@ void OsRetrieveCacheSize(std::array<int64_t,
kCacheLevels>* cache_sizes) {
free(buffer);
}
-# if defined(CPUINFO_ARCH_X86)
-// On x86, get CPU features by cpuid, https://en.wikipedia.org/wiki/CPUID
-
-# if defined(__MINGW64_VERSION_MAJOR) && __MINGW64_VERSION_MAJOR < 5
-void __cpuidex(int CPUInfo[4], int function_id, int subfunction_id) {
- __asm__ __volatile__("cpuid"
- : "=a"(CPUInfo[0]), "=b"(CPUInfo[1]), "=c"(CPUInfo[2]),
- "=d"(CPUInfo[3])
- : "a"(function_id), "c"(subfunction_id));
-}
-
-int64_t _xgetbv(int xcr) {
- int out = 0;
- __asm__ __volatile__("xgetbv" : "=a"(out) : "c"(xcr) : "%edx");
- return out;
-}
-# endif // MINGW
-
-void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
- std::string* model_name) {
- int register_EAX_id = 1;
- int highest_valid_id = 0;
- int highest_extended_valid_id = 0;
- std::bitset<32> features_ECX;
- std::array<int, 4> cpu_info = {};
-
- // Get highest valid id
- __cpuid(cpu_info.data(), 0);
- highest_valid_id = cpu_info[0];
- // HEX of "GenuineIntel": 47656E75 696E6549 6E74656C
- // HEX of "AuthenticAMD": 41757468 656E7469 63414D44
- if (cpu_info[1] == 0x756e6547 && cpu_info[3] == 0x49656e69 &&
- cpu_info[2] == 0x6c65746e) {
- *vendor = CpuInfo::Vendor::Intel;
- } else if (cpu_info[1] == 0x68747541 && cpu_info[3] == 0x69746e65 &&
- cpu_info[2] == 0x444d4163) {
- *vendor = CpuInfo::Vendor::AMD;
- }
-
- if (highest_valid_id <= register_EAX_id) {
- return;
- }
-
- // EAX=1: Processor Info and Feature Bits
- __cpuidex(cpu_info.data(), register_EAX_id, 0);
- features_ECX = cpu_info[2];
-
- // Get highest extended id
- __cpuid(cpu_info.data(), 0x80000000);
- highest_extended_valid_id = cpu_info[0];
-
- // Retrieve CPU model name
- if (highest_extended_valid_id >= static_cast<int>(0x80000004)) {
- model_name->clear();
- for (int i = 0x80000002; i <= static_cast<int>(0x80000004); ++i) {
- __cpuidex(cpu_info.data(), i, 0);
- *model_name +=
- std::string(reinterpret_cast<char*>(cpu_info.data()),
sizeof(cpu_info));
- }
- }
-
- bool zmm_enabled = false;
- bool ymm_enabled = false;
- if (features_ECX[27]) { // OSXSAVE
- // Query if the OS supports saving YMM and ZMM registers when switching
contexts
- int64_t xcr0 = _xgetbv(0);
- zmm_enabled = (xcr0 & 0xE0) == 0xE0;
- ymm_enabled = (xcr0 & 0b110) == 0b110;
- }
-
- if (features_ECX[9]) *hardware_flags |= CpuInfo::SSSE3;
- if (features_ECX[19]) *hardware_flags |= CpuInfo::SSE4_1;
- if (features_ECX[20]) *hardware_flags |= CpuInfo::SSE4_2;
- if (features_ECX[23]) *hardware_flags |= CpuInfo::POPCNT;
- if (ymm_enabled && features_ECX[28]) *hardware_flags |= CpuInfo::AVX;
-
- // cpuid with EAX=7, ECX=0: Extended Features
- register_EAX_id = 7;
- if (highest_valid_id > register_EAX_id) {
- __cpuidex(cpu_info.data(), register_EAX_id, 0);
- std::bitset<32> features_EBX = cpu_info[1];
-
- if (features_EBX[3]) *hardware_flags |= CpuInfo::BMI1;
- if (features_EBX[8]) *hardware_flags |= CpuInfo::BMI2;
- // Only use AVX/AVX2 if enabled by the OS
- if (ymm_enabled && features_EBX[5]) *hardware_flags |= CpuInfo::AVX2;
- // ARROW-11427: only use AVX512 if enabled by the OS
- if (ymm_enabled && zmm_enabled) {
- if (features_EBX[16]) *hardware_flags |= CpuInfo::AVX512F;
- if (features_EBX[17]) *hardware_flags |= CpuInfo::AVX512DQ;
- if (features_EBX[28]) *hardware_flags |= CpuInfo::AVX512CD;
- if (features_EBX[30]) *hardware_flags |= CpuInfo::AVX512BW;
- if (features_EBX[31]) *hardware_flags |= CpuInfo::AVX512VL;
- }
- }
-}
-# elif defined(CPUINFO_ARCH_ARM)
-// Windows on Arm
-void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
- std::string* model_name) {
- *hardware_flags |= CpuInfo::ASIMD;
- // TODO: vendor, model_name
-}
-# endif
-
#elif defined(__APPLE__)
//------------------------------ MACOS ------------------------------//
std::optional<int64_t> IntegerSysCtlByName(const char* name) {
@@ -244,8 +183,9 @@ std::optional<int64_t> IntegerSysCtlByName(const char*
name) {
return std::nullopt;
}
-void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
- static_assert(kCacheLevels >= 3, "");
+template <std::size_t N>
+void OsRetrieveCacheSize(std::array<int64_t, N>* cache_sizes) {
+ static_assert(N >= 3);
auto c = IntegerSysCtlByName("hw.l1dcachesize");
if (c.has_value()) {
(*cache_sizes)[0] = *c;
@@ -260,61 +200,21 @@ void OsRetrieveCacheSize(std::array<int64_t,
kCacheLevels>* cache_sizes) {
}
}
-void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
- std::string* model_name) {
- // hardware_flags
- struct SysCtlCpuFeature {
- const char* name;
- int64_t flag;
- };
- std::vector<SysCtlCpuFeature> features = {
-# if defined(CPUINFO_ARCH_X86)
- {"hw.optional.sse4_2",
- CpuInfo::SSSE3 | CpuInfo::SSE4_1 | CpuInfo::SSE4_2 | CpuInfo::POPCNT},
- {"hw.optional.avx1_0", CpuInfo::AVX},
- {"hw.optional.avx2_0", CpuInfo::AVX2},
- {"hw.optional.bmi1", CpuInfo::BMI1},
- {"hw.optional.bmi2", CpuInfo::BMI2},
- {"hw.optional.avx512f", CpuInfo::AVX512F},
- {"hw.optional.avx512cd", CpuInfo::AVX512CD},
- {"hw.optional.avx512dq", CpuInfo::AVX512DQ},
- {"hw.optional.avx512bw", CpuInfo::AVX512BW},
- {"hw.optional.avx512vl", CpuInfo::AVX512VL},
-# elif defined(CPUINFO_ARCH_ARM)
- // ARM64 (note that this is exposed under Rosetta as well)
- {"hw.optional.neon", CpuInfo::ASIMD},
-# endif
- };
- for (const auto& feature : features) {
- auto v = IntegerSysCtlByName(feature.name);
- if (v.value_or(0)) {
- *hardware_flags |= feature.flag;
- }
- }
-
- // TODO: vendor, model_name
-}
-
-#else
+#elif defined(__linux__)
//------------------------------ LINUX ------------------------------//
-# if defined(CPUINFO_ARCH_ARM)
-# include <asm/hwcap.h>
-# include <sys/auxv.h>
-# include <sys/prctl.h>
-# endif
// Get cache size, return 0 on error
int64_t LinuxGetCacheSize(int level) {
// get cache size by sysconf()
# ifdef _SC_LEVEL1_DCACHE_SIZE
- const int kCacheSizeConf[] = {
+ constexpr auto kCacheSizeConf = std::array<int, 3>{
_SC_LEVEL1_DCACHE_SIZE,
_SC_LEVEL2_CACHE_SIZE,
_SC_LEVEL3_CACHE_SIZE,
};
- static_assert(sizeof(kCacheSizeConf) / sizeof(kCacheSizeConf[0]) ==
kCacheLevels, "");
errno = 0;
+ DCHECK(0 <= level && static_cast<std::size_t>(level) <
kCacheSizeConf.size());
const int64_t cache_size = sysconf(kCacheSizeConf[level]);
if (errno == 0 && cache_size > 0) {
return cache_size;
@@ -322,13 +222,13 @@ int64_t LinuxGetCacheSize(int level) {
# endif
// get cache size from sysfs if sysconf() fails or not supported
- const char* kCacheSizeSysfs[] = {
+ constexpr auto kCacheSizeSysfs = std::array<const char*, 3>{
"/sys/devices/system/cpu/cpu0/cache/index0/size", // l1d (index1 is l1i)
"/sys/devices/system/cpu/cpu0/cache/index2/size", // l2
"/sys/devices/system/cpu/cpu0/cache/index3/size", // l3
};
- static_assert(sizeof(kCacheSizeSysfs) / sizeof(kCacheSizeSysfs[0]) ==
kCacheLevels, "");
+ DCHECK(0 <= level && static_cast<std::size_t>(level) <
kCacheSizeSysfs.size());
std::ifstream cacheinfo(kCacheSizeSysfs[level], std::ios::in);
if (!cacheinfo) {
return 0;
@@ -349,44 +249,10 @@ int64_t LinuxGetCacheSize(int level) {
return static_cast<int64_t>(size);
}
-// Helper function to parse for hardware flags from /proc/cpuinfo
-// values contains a list of space-separated flags. check to see if the flags
we
-// care about are present.
-// Returns a bitmap of flags.
-int64_t LinuxParseCpuFlags(const std::string& values) {
-# if defined(CPUINFO_ARCH_X86) || defined(CPUINFO_ARCH_ARM)
- const struct {
- std::string name;
- int64_t flag;
- } flag_mappings[] = {
-# if defined(CPUINFO_ARCH_X86)
- {"ssse3", CpuInfo::SSSE3}, {"sse4_1", CpuInfo::SSE4_1},
- {"sse4_2", CpuInfo::SSE4_2}, {"popcnt", CpuInfo::POPCNT},
- {"avx", CpuInfo::AVX}, {"avx2", CpuInfo::AVX2},
- {"avx512f", CpuInfo::AVX512F}, {"avx512cd", CpuInfo::AVX512CD},
- {"avx512vl", CpuInfo::AVX512VL}, {"avx512dq", CpuInfo::AVX512DQ},
- {"avx512bw", CpuInfo::AVX512BW}, {"bmi1", CpuInfo::BMI1},
- {"bmi2", CpuInfo::BMI2},
-# elif defined(CPUINFO_ARCH_ARM)
- {"asimd", CpuInfo::ASIMD},
-# endif
- };
- const int64_t num_flags = sizeof(flag_mappings) / sizeof(flag_mappings[0]);
-
- int64_t flags = 0;
- for (int i = 0; i < num_flags; ++i) {
- if (values.find(flag_mappings[i].name) != std::string::npos) {
- flags |= flag_mappings[i].flag;
- }
- }
- return flags;
-# else
- return 0;
-# endif
-}
-
-void OsRetrieveCacheSize(std::array<int64_t, kCacheLevels>* cache_sizes) {
- for (int i = 0; i < kCacheLevels; ++i) {
+template <std::size_t N>
+void OsRetrieveCacheSize(std::array<int64_t, N>* cache_sizes) {
+ static_assert(N <= 3);
+ for (int i = 0; i < static_cast<int>(N); ++i) {
const int64_t cache_size = LinuxGetCacheSize(i);
if (cache_size > 0) {
(*cache_sizes)[i] = cache_size;
@@ -394,59 +260,18 @@ void OsRetrieveCacheSize(std::array<int64_t,
kCacheLevels>* cache_sizes) {
}
}
-// Read from /proc/cpuinfo
-// TODO: vendor, model_name for Arm
-void OsRetrieveCpuInfo(int64_t* hardware_flags, CpuInfo::Vendor* vendor,
- std::string* model_name) {
- std::ifstream cpuinfo("/proc/cpuinfo", std::ios::in);
- while (cpuinfo) {
- std::string line;
- std::getline(cpuinfo, line);
- const size_t colon = line.find(':');
- if (colon != std::string::npos) {
- const std::string name = TrimString(line.substr(0, colon - 1));
- const std::string value = TrimString(line.substr(colon + 1,
std::string::npos));
- if (name.compare("flags") == 0 || name.compare("Features") == 0) {
- *hardware_flags |= LinuxParseCpuFlags(value);
- } else if (name.compare("model name") == 0) {
- *model_name = value;
- } else if (name.compare("vendor_id") == 0) {
- if (value.compare("GenuineIntel") == 0) {
- *vendor = CpuInfo::Vendor::Intel;
- } else if (value.compare("AuthenticAMD") == 0) {
- *vendor = CpuInfo::Vendor::AMD;
- }
- }
- }
- }
+#else
-# if defined(CPUINFO_ARCH_ARM)
- // Detect SVE and vector length via getauxval/prctl (more reliable than
/proc/cpuinfo)
-# ifdef HWCAP_SVE
- const auto hwcap = getauxval(AT_HWCAP);
- if (hwcap & HWCAP_SVE) {
- *hardware_flags |= CpuInfo::SVE;
-# ifdef PR_SVE_GET_VL
- const int vl = prctl(PR_SVE_GET_VL);
- assert(vl >= 0);
- // prctl returns vector length in bytes; mask off status flags
- const int vl_bytes = vl & PR_SVE_VL_LEN_MASK;
- // Running SVE128 on a SVE256 machine is more tricky than the x86
equivalent of
- // running SSE code on an AVX machine and requires to explicitly change the
- // vector length using `prctl` (per thread setting).
- if (vl_bytes == 16) *hardware_flags |= CpuInfo::SVE128; // 128 bits
- if (vl_bytes == 32) *hardware_flags |= CpuInfo::SVE256; // 256 bits
- if (vl_bytes == 64) *hardware_flags |= CpuInfo::SVE512; // 512 bits
-# endif // PR_SVE_GET_VL
- }
-# endif // HWCAP_SVE
-# endif // CPUINFO_ARCH_ARM
+template <std::size_t N>
+void OsRetrieveCacheSize(std::array<int64_t, N>* /* cache_sizes */) {
+ // NoOp, will be defaulted by CpuInfo::CacheSize
}
-#endif // WINDOWS, MACOS, LINUX
+
+#endif // WINDOWS, MACOS, LINUX
//============================== Arch Dependent
==============================//
-#if defined(CPUINFO_ARCH_X86)
+#if XSIMD_TARGET_X86
//------------------------------ X86_64 ------------------------------//
bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
enum {
@@ -498,7 +323,7 @@ void ArchVerifyCpuRequirements(const CpuInfo* ci) {
# endif
}
-#elif defined(CPUINFO_ARCH_ARM)
+#elif XSIMD_TARGET_ARM64
//------------------------------ AARCH64 ------------------------------//
bool ArchParseUserSimdLevel(const std::string& simd_level, int64_t*
hardware_flags) {
enum {
@@ -550,95 +375,64 @@ void ArchVerifyCpuRequirements(const CpuInfo* ci) {}
} // namespace
-struct CpuInfo::Impl {
- int64_t hardware_flags = 0;
- int num_cores = 0;
- int64_t original_hardware_flags = 0;
- Vendor vendor = Vendor::Unknown;
- std::string model_name = "Unknown";
- std::array<int64_t, kCacheLevels> cache_sizes{};
-
- Impl() {
- OsRetrieveCacheSize(&cache_sizes);
- OsRetrieveCpuInfo(&hardware_flags, &vendor, &model_name);
- original_hardware_flags = hardware_flags;
- num_cores =
std::max(static_cast<int>(std::thread::hardware_concurrency()), 1);
-
- // parse user simd level
- auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
- if (!maybe_env_var.ok()) {
- return;
- }
- std::string s = *std::move(maybe_env_var);
- std::transform(s.begin(), s.end(), s.begin(),
- [](unsigned char c) { return std::toupper(c); });
- if (!ArchParseUserSimdLevel(s, &hardware_flags)) {
- ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
- }
- }
+CpuInfo::CpuInfo() {
+ OsRetrieveCacheSize(&cache_sizes_);
+ OsRetrieveCpuInfo(&hardware_flags_, &vendor_);
+ original_hardware_flags_ = hardware_flags_;
+ num_cores_ = std::max(static_cast<int>(std::thread::hardware_concurrency()),
1);
- void EnableFeature(int64_t flag, bool enable) {
- if (!enable) {
- hardware_flags &= ~flag;
- } else {
- // Can't turn something on that can't be supported
- DCHECK_EQ((~original_hardware_flags) & flag, 0);
- hardware_flags |= (flag & original_hardware_flags);
- }
+ // parse user simd level
+ auto maybe_env_var = GetEnvVar("ARROW_USER_SIMD_LEVEL");
+ if (!maybe_env_var.ok()) {
+ return;
}
-};
-
-CpuInfo::~CpuInfo() = default;
-
-CpuInfo::CpuInfo() : impl_(new Impl) {}
+ std::string s = *std::move(maybe_env_var);
+ std::transform(s.begin(), s.end(), s.begin(),
+ [](unsigned char c) { return std::toupper(c); });
+ if (!ArchParseUserSimdLevel(s, &hardware_flags_)) {
+ ARROW_LOG(WARNING) << "Invalid value for ARROW_USER_SIMD_LEVEL: " << s;
+ }
+}
const CpuInfo* CpuInfo::GetInstance() {
- static CpuInfo cpu_info;
+ static const CpuInfo cpu_info;
return &cpu_info;
}
-int64_t CpuInfo::hardware_flags() const { return impl_->hardware_flags; }
-
-int CpuInfo::num_cores() const { return impl_->num_cores <= 0 ? 1 :
impl_->num_cores; }
-
-CpuInfo::Vendor CpuInfo::vendor() const { return impl_->vendor; }
-
-const std::string& CpuInfo::model_name() const { return impl_->model_name; }
-
int64_t CpuInfo::CacheSize(CacheLevel level) const {
- constexpr int64_t kDefaultCacheSizes[] = {
+ constexpr auto kDefaultCacheSizes = std::array<int64_t, 3>{
32 * 1024, // Level 1: 32K
256 * 1024, // Level 2: 256K
3072 * 1024, // Level 3: 3M
};
- static_assert(
- sizeof(kDefaultCacheSizes) / sizeof(kDefaultCacheSizes[0]) ==
kCacheLevels, "");
+ static_assert(kDefaultCacheSizes.size() == kCacheLevels);
- static_assert(static_cast<int>(CacheLevel::L1) == 0, "");
+ static_assert(static_cast<int>(CacheLevel::L1) == 0);
const int i = static_cast<int>(level);
- if (impl_->cache_sizes[i] > 0) return impl_->cache_sizes[i];
+ if (cache_sizes_[i] > 0) return cache_sizes_[i];
if (i == 0) return kDefaultCacheSizes[0];
// l3 may be not available, return maximum of l2 or default size
- return std::max(kDefaultCacheSizes[i], impl_->cache_sizes[i - 1]);
+ return std::max(kDefaultCacheSizes[i], cache_sizes_[i - 1]);
}
bool CpuInfo::IsSupported(int64_t flags) const {
- return (impl_->hardware_flags & flags) == flags;
+ return (hardware_flags_ & flags) == flags;
}
bool CpuInfo::IsDetected(int64_t flags) const {
- return (impl_->original_hardware_flags & flags) == flags;
+ return (original_hardware_flags_ & flags) == flags;
}
void CpuInfo::VerifyCpuRequirements() const { return
ArchVerifyCpuRequirements(this); }
void CpuInfo::EnableFeature(int64_t flag, bool enable) {
- impl_->EnableFeature(flag, enable);
+ if (!enable) {
+ hardware_flags_ &= ~flag;
+ } else {
+ // Can't turn something on that can't be supported
+ DCHECK_EQ((~original_hardware_flags_) & flag, 0);
+ hardware_flags_ |= (flag & original_hardware_flags_);
+ }
}
-} // namespace internal
-} // namespace arrow
-
-#undef CPUINFO_ARCH_X86
-#undef CPUINFO_ARCH_ARM
-#undef CPUINFO_ARCH_PPC
+} // namespace arrow::internal
diff --git a/cpp/src/arrow/util/cpu_info.h b/cpp/src/arrow/util/cpu_info.h
index de0ef13cc5..7e4ca4c7f1 100644
--- a/cpp/src/arrow/util/cpu_info.h
+++ b/cpp/src/arrow/util/cpu_info.h
@@ -20,15 +20,13 @@
#pragma once
+#include <array>
#include <cstdint>
-#include <memory>
-#include <string>
+#include <string_view>
-#include "arrow/util/macros.h"
#include "arrow/util/visibility.h"
-namespace arrow {
-namespace internal {
+namespace arrow::internal {
/// CpuInfo is an interface to query for cpu information at runtime. The
caller can
/// ask for the sizes of the caches and what hardware features are supported.
@@ -36,8 +34,6 @@ namespace internal {
/// /sys/devices)
class ARROW_EXPORT CpuInfo {
public:
- ~CpuInfo();
-
/// x86 features
static constexpr int64_t SSSE3 = (1LL << 0);
static constexpr int64_t SSE4_1 = (1LL << 1);
@@ -70,16 +66,19 @@ class ARROW_EXPORT CpuInfo {
static const CpuInfo* GetInstance();
/// Returns all the flags for this cpu
- int64_t hardware_flags() const;
+ int64_t hardware_flags() const { return hardware_flags_; }
/// Returns the number of cores (including hyper-threaded) on this machine.
- int num_cores() const;
+ int num_cores() const { return num_cores_ <= 0 ? 1 : num_cores_; }
/// Returns the vendor of the cpu.
- Vendor vendor() const;
+ Vendor vendor() const { return vendor_; }
/// Returns the model name of the cpu (e.g. Intel i7-2600)
- const std::string& model_name() const;
+ std::string_view model_name() const {
+ // Unavailable in xsimd at the time of migration and previously unused.
+ return "Unknown";
+ }
/// Returns the size of the cache in KB at this cache level
int64_t CacheSize(CacheLevel level) const;
@@ -108,11 +107,15 @@ class ARROW_EXPORT CpuInfo {
}
private:
- CpuInfo();
+ static constexpr int kCacheLevels =
static_cast<int>(CpuInfo::CacheLevel::Last) + 1;
- struct Impl;
- std::unique_ptr<Impl> impl_;
+ std::array<int64_t, kCacheLevels> cache_sizes_ = {};
+ int64_t original_hardware_flags_ = 0;
+ int64_t hardware_flags_ = 0;
+ int num_cores_ = 0;
+ Vendor vendor_ = Vendor::Unknown;
+
+ CpuInfo();
};
-} // namespace internal
-} // namespace arrow
+} // namespace arrow::internal
diff --git a/cpp/src/gandiva/CMakeLists.txt b/cpp/src/gandiva/CMakeLists.txt
index 31a86d5da9..aabe4ec8bf 100644
--- a/cpp/src/gandiva/CMakeLists.txt
+++ b/cpp/src/gandiva/CMakeLists.txt
@@ -115,10 +115,8 @@ set(GANDIVA_STATIC_LINK_LIBS
LLVM::LLVM_LIBS
${GANDIVA_OPENSSL_LIBS}
Boost::headers)
-if(ARROW_USE_XSIMD)
- list(APPEND GANDIVA_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
- list(APPEND GANDIVA_STATIC_LINK_LIBS ${ARROW_XSIMD})
-endif()
+list(APPEND GANDIVA_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
+list(APPEND GANDIVA_STATIC_LINK_LIBS ${ARROW_XSIMD})
if(ARROW_WITH_RE2)
list(APPEND GANDIVA_SHARED_PRIVATE_LINK_LIBS re2::re2)
list(APPEND GANDIVA_STATIC_LINK_LIBS re2::re2)
diff --git a/cpp/src/parquet/CMakeLists.txt b/cpp/src/parquet/CMakeLists.txt
index 07cf9f9c50..6839f2ff4c 100644
--- a/cpp/src/parquet/CMakeLists.txt
+++ b/cpp/src/parquet/CMakeLists.txt
@@ -228,11 +228,9 @@ endif()
set(PARQUET_SHARED_LINK_LIBS)
set(PARQUET_SHARED_PRIVATE_LINK_LIBS)
-if(ARROW_USE_XSIMD)
- list(APPEND PARQUET_SHARED_LINK_LIBS ${ARROW_XSIMD})
- list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
- list(APPEND PARQUET_STATIC_LINK_LIBS ${ARROW_XSIMD})
-endif()
+list(APPEND PARQUET_SHARED_LINK_LIBS ${ARROW_XSIMD})
+list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_XSIMD})
+list(APPEND PARQUET_STATIC_LINK_LIBS ${ARROW_XSIMD})
if(PARQUET_REQUIRE_ENCRYPTION)
list(APPEND PARQUET_SHARED_PRIVATE_LINK_LIBS ${ARROW_OPENSSL_LIBS})