(arrow) branch main updated: GH-48591: [C++] Remove some bit utils from bit_utils.h and replace them with C++ 20 built in functions (#49298)

apitrou Wed, 18 Feb 2026 00:54:39 -0800

This is an automated email from the ASF dual-hosted git repository.

apitrou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow.git



The following commit(s) were added to refs/heads/main by this push:
     new 2691103628 GH-48591: [C++] Remove some bit utils from bit_utils.h and 
replace them with C++ 20 built in functions (#49298)
2691103628 is described below

commit 269110362820185db3bf7aa78ba176d71cb00e75
Author: Paweł Biegun <[email protected]>
AuthorDate: Wed Feb 18 00:54:17 2026 -0800

    GH-48591: [C++] Remove some bit utils from bit_utils.h and replace them 
with C++ 20 built in functions (#49298)
    
    ### Rationale for this change
    Before C++ 20 there was no built in implementation for many common bit 
operations utilities included in the stdlib so they were implemented in 
bit_utils.h. Now that they are included in the stdlib they should be removed 
from bit_utils to decrease the amount of code that needs to be maintained as 
described in #48591
    
    ### What changes are included in this PR?
    IsPowerOf2, PopCount, CountLeadingZeros, CountTrailingZeros, 
NumRequiredBits are removed from bit_utils and replaced with their equivalents 
from bit.h i.e. has_single_bit, popcount, countl_zero, countr_zero and 
bit_width.
    
    ### Are these changes tested?
    No new code is introduced and the stdlib implementation maintains parity 
with the replaced functions so no new unit tests are necessary.
    
    ### Are there any user-facing changes?
    No
    * GitHub Issue: #48591
    
    Lead-authored-by: Paweł Biegun <[email protected]>
    Co-authored-by: Antoine Pitrou <[email protected]>
    Signed-off-by: Antoine Pitrou <[email protected]>
---
 cpp/src/arrow/acero/aggregate_benchmark.cc         |   3 +-
 cpp/src/arrow/acero/bloom_filter_test.cc           |   5 +-
 cpp/src/arrow/acero/swiss_join.cc                  |   5 +-
 .../compute/kernels/base_arithmetic_internal.h     |   4 +-
 cpp/src/arrow/compute/key_hash_internal.cc         |   5 +-
 cpp/src/arrow/compute/key_map_internal.cc          |  18 +--
 cpp/src/arrow/compute/row/row_internal.cc          |  10 +-
 cpp/src/arrow/compute/row/row_internal.h           |   8 +-
 cpp/src/arrow/compute/util.cc                      |   7 +-
 cpp/src/arrow/compute/util_avx2.cc                 |   5 +-
 cpp/src/arrow/testing/uniform_real.h               |   5 +-
 cpp/src/arrow/util/align_util.h                    |   3 +-
 cpp/src/arrow/util/basic_decimal.cc                |   9 +-
 cpp/src/arrow/util/bit_block_counter.h             |  28 ++--
 cpp/src/arrow/util/bit_run_reader.h                |  19 +--
 cpp/src/arrow/util/bit_util.h                      | 144 +--------------------
 cpp/src/arrow/util/bit_util_test.cc                |  69 ----------
 cpp/src/arrow/util/bitmap_ops.cc                   |  10 +-
 cpp/src/arrow/util/bitmap_reader_benchmark.cc      |   7 +-
 cpp/src/arrow/util/bitmap_writer.h                 |   3 +-
 cpp/src/arrow/util/rle_encoding_test.cc            |   3 +-
 cpp/src/gandiva/selection_vector.cc                |   3 +-
 cpp/src/parquet/chunker_internal.cc                |   4 +-
 cpp/src/parquet/encoder.cc                         |   5 +-
 cpp/src/parquet/level_conversion_inc.h             |   9 +-
 25 files changed, 101 insertions(+), 290 deletions(-)

diff --git a/cpp/src/arrow/acero/aggregate_benchmark.cc 
b/cpp/src/arrow/acero/aggregate_benchmark.cc
index 2e9cccd80d..171d19830a 100644
--- a/cpp/src/arrow/acero/aggregate_benchmark.cc
+++ b/cpp/src/arrow/acero/aggregate_benchmark.cc
@@ -17,6 +17,7 @@
 
 #include "benchmark/benchmark.h"
 
+#include <bit>
 #include <cassert>
 #include <cmath>
 #include <iostream>
@@ -269,7 +270,7 @@ struct SumBitmapVectorizeUnroll : public Summer<T> {
         local.total += SUM_SHIFT(5);
         local.total += SUM_SHIFT(6);
         local.total += SUM_SHIFT(7);
-        local.valid_count += bit_util::kBytePopcount[valid_byte];
+        local.valid_count += std::popcount(valid_byte);
       } else {
         // No nulls
         local.total += values[i + 0] + values[i + 1] + values[i + 2] + 
values[i + 3] +
diff --git a/cpp/src/arrow/acero/bloom_filter_test.cc 
b/cpp/src/arrow/acero/bloom_filter_test.cc
index 30cafd120c..62071cfcf1 100644
--- a/cpp/src/arrow/acero/bloom_filter_test.cc
+++ b/cpp/src/arrow/acero/bloom_filter_test.cc
@@ -18,6 +18,7 @@
 #include <gmock/gmock-matchers.h>
 
 #include <algorithm>
+#include <bit>
 #include <chrono>
 #include <condition_variable>
 #include <thread>
@@ -407,14 +408,14 @@ void TestBloomLarge(BloomFilterBuildStrategy strategy, 
int64_t num_build,
     uint64_t num_negatives = 0ULL;
     for (int iword = 0; iword < next_batch_size / 64; ++iword) {
       uint64_t word = reinterpret_cast<const 
uint64_t*>(result_bit_vector.data())[iword];
-      num_negatives += ARROW_POPCOUNT64(~word);
+      num_negatives += std::popcount(~word);
     }
     if (next_batch_size % 64 > 0) {
       uint64_t word = reinterpret_cast<const uint64_t*>(
           result_bit_vector.data())[next_batch_size / 64];
       uint64_t mask = (1ULL << (next_batch_size % 64)) - 1;
       word |= ~mask;
-      num_negatives += ARROW_POPCOUNT64(~word);
+      num_negatives += std::popcount(~word);
     }
     if (i < num_build) {
       num_negatives_build += num_negatives;
diff --git a/cpp/src/arrow/acero/swiss_join.cc 
b/cpp/src/arrow/acero/swiss_join.cc
index 97632e0ca0..9b2ebc33e1 100644
--- a/cpp/src/arrow/acero/swiss_join.cc
+++ b/cpp/src/arrow/acero/swiss_join.cc
@@ -17,6 +17,7 @@
 
 #include <sys/stat.h>
 #include <algorithm>  // std::upper_bound
+#include <bit>
 #include <cstdio>
 #include <cstdlib>
 #include <mutex>
@@ -666,7 +667,7 @@ void SwissTableMerge::MergePartition(SwissTable* target, 
const SwissTable* sourc
     // For each non-empty source slot...
     constexpr uint64_t kHighBitOfEachByte = 0x8080808080808080ULL;
     int num_full_slots = SwissTable::kSlotsPerBlock -
-                         static_cast<int>(ARROW_POPCOUNT64(block & 
kHighBitOfEachByte));
+                         static_cast<int>(std::popcount(block & 
kHighBitOfEachByte));
     for (int local_slot_id = 0; local_slot_id < num_full_slots; 
++local_slot_id) {
       // Read group id and hash for this slot.
       //
@@ -722,7 +723,7 @@ inline bool SwissTableMerge::InsertNewGroup(SwissTable* 
target, uint32_t group_i
     return false;
   }
   int local_slot_id = SwissTable::kSlotsPerBlock -
-                      static_cast<int>(ARROW_POPCOUNT64(block & 
kHighBitOfEachByte));
+                      static_cast<int>(std::popcount(block & 
kHighBitOfEachByte));
   uint32_t global_slot_id = SwissTable::global_slot_id(block_id, 
local_slot_id);
   target->insert_into_empty_slot(global_slot_id, hash, group_id);
   return true;
diff --git a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h 
b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
index 960ba59892..b4840061ae 100644
--- a/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
+++ b/cpp/src/arrow/compute/kernels/base_arithmetic_internal.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <bit>
 #include <limits>
 #include "arrow/compute/api_scalar.h"
 #include "arrow/compute/kernels/common_internal.h"
@@ -594,8 +595,7 @@ struct PowerChecked {
     }
     // left to right O(logn) power with overflow checks
     bool overflow = false;
-    uint64_t bitmask =
-        1ULL << (63 - bit_util::CountLeadingZeros(static_cast<uint64_t>(exp)));
+    uint64_t bitmask = 1ULL << (63 - 
std::countl_zero(static_cast<uint64_t>(exp)));
     T pow = 1;
     while (bitmask) {
       overflow |= MultiplyWithOverflow(pow, pow, &pow);
diff --git a/cpp/src/arrow/compute/key_hash_internal.cc 
b/cpp/src/arrow/compute/key_hash_internal.cc
index a0002efb3f..4608a742e1 100644
--- a/cpp/src/arrow/compute/key_hash_internal.cc
+++ b/cpp/src/arrow/compute/key_hash_internal.cc
@@ -20,6 +20,7 @@
 #include <memory.h>
 
 #include <algorithm>
+#include <bit>
 #include <cstdint>
 
 #include "arrow/compute/light_array_internal.h"
@@ -357,7 +358,7 @@ void Hashing32::HashInt(bool combine_hashes, uint32_t 
num_keys, uint64_t key_len
 void Hashing32::HashFixed(int64_t hardware_flags, bool combine_hashes, 
uint32_t num_keys,
                           uint64_t key_length, const uint8_t* keys, uint32_t* 
hashes,
                           uint32_t* temp_hashes_for_combine) {
-  if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) {
+  if (std::popcount(key_length) == 1 && key_length <= sizeof(uint64_t)) {
     HashInt(combine_hashes, num_keys, key_length, keys, hashes);
     return;
   }
@@ -809,7 +810,7 @@ void Hashing64::HashInt(bool combine_hashes, uint32_t 
num_keys, uint64_t key_len
 
 void Hashing64::HashFixed(bool combine_hashes, uint32_t num_keys, uint64_t 
key_length,
                           const uint8_t* keys, uint64_t* hashes) {
-  if (ARROW_POPCOUNT64(key_length) == 1 && key_length <= sizeof(uint64_t)) {
+  if (std::popcount(key_length) == 1 && key_length <= sizeof(uint64_t)) {
     HashInt(combine_hashes, num_keys, key_length, keys, hashes);
     return;
   }
diff --git a/cpp/src/arrow/compute/key_map_internal.cc 
b/cpp/src/arrow/compute/key_map_internal.cc
index 4a2405e754..353449cf16 100644
--- a/cpp/src/arrow/compute/key_map_internal.cc
+++ b/cpp/src/arrow/compute/key_map_internal.cc
@@ -18,6 +18,7 @@
 #include "arrow/compute/key_map_internal.h"
 
 #include <algorithm>
+#include <bit>
 #include <cstdint>
 
 #include "arrow/util/bit_util.h"
@@ -27,7 +28,6 @@
 
 namespace arrow {
 
-using bit_util::CountLeadingZeros;
 using internal::CpuInfo;
 
 namespace compute {
@@ -91,7 +91,7 @@ inline void SwissTable::search_block(uint64_t block, int 
stamp, int start_slot,
   // Now if we or with the highest bits of the block and scan zero bits in 
reverse, we get
   // 8x slot index that we were looking for. This formula works in all three 
cases a), b)
   // and c).
-  *out_slot = static_cast<int>(CountLeadingZeros(matches | block_high_bits) >> 
3);
+  *out_slot = static_cast<int>(std::countl_zero(matches | block_high_bits) >> 
3);
 }
 
 template <typename T, bool use_selection>
@@ -204,8 +204,8 @@ void SwissTable::init_slot_ids_for_new_keys(uint32_t 
num_ids, const uint16_t* id
   int num_block_bytes = 
num_block_bytes_from_num_groupid_bits(num_groupid_bits);
   if (log_blocks_ == 0) {
     uint64_t block = *reinterpret_cast<const 
uint64_t*>(blocks_->mutable_data());
-    uint32_t empty_slot = static_cast<uint32_t>(
-        kSlotsPerBlock - ARROW_POPCOUNT64(block & kHighBitOfEachByte));
+    uint32_t empty_slot =
+        static_cast<uint32_t>(kSlotsPerBlock - std::popcount(block & 
kHighBitOfEachByte));
     for (uint32_t i = 0; i < num_ids; ++i) {
       int id = ids[i];
       slot_ids[id] = empty_slot;
@@ -224,7 +224,7 @@ void SwissTable::init_slot_ids_for_new_keys(uint32_t 
num_ids, const uint16_t* id
         }
         iblock = (iblock + 1) & ((1 << log_blocks_) - 1);
       }
-      uint32_t empty_slot = static_cast<int>(kSlotsPerBlock - 
ARROW_POPCOUNT64(block));
+      uint32_t empty_slot = static_cast<int>(kSlotsPerBlock - 
std::popcount(block));
       slot_ids[id] = global_slot_id(iblock, empty_slot);
     }
   }
@@ -684,7 +684,7 @@ Status SwissTable::grow_double() {
         mutable_block_data(blocks_new->mutable_data(), 2 * i, 
block_size_after);
     uint64_t block = *reinterpret_cast<const uint64_t*>(block_base);
 
-    uint32_t full_slots = CountLeadingZeros(block & kHighBitOfEachByte) >> 3;
+    uint32_t full_slots = std::countl_zero(block & kHighBitOfEachByte) >> 3;
     uint32_t full_slots_new[2];
     full_slots_new[0] = full_slots_new[1] = 0;
     util::SafeStore(double_block_base_new, kHighBitOfEachByte);
@@ -722,7 +722,7 @@ Status SwissTable::grow_double() {
     // How many full slots in this block
     const uint8_t* block_base = block_data(i, block_size_before);
     uint64_t block = util::SafeLoadAs<uint64_t>(block_base);
-    uint32_t full_slots = CountLeadingZeros(block & kHighBitOfEachByte) >> 3;
+    uint32_t full_slots = std::countl_zero(block & kHighBitOfEachByte) >> 3;
 
     for (uint32_t j = 0; j < full_slots; ++j) {
       uint32_t slot_id = global_slot_id(i, j);
@@ -741,13 +741,13 @@ Status SwissTable::grow_double() {
           mutable_block_data(blocks_new->mutable_data(), block_id_new, 
block_size_after);
       uint64_t block_new = util::SafeLoadAs<uint64_t>(block_base_new);
       int full_slots_new =
-          static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) 
>> 3);
+          static_cast<int>(std::countl_zero(block_new & kHighBitOfEachByte) >> 
3);
       while (full_slots_new == kSlotsPerBlock) {
         block_id_new = (block_id_new + 1) & ((1 << log_blocks_after) - 1);
         block_base_new = blocks_new->mutable_data() + block_id_new * 
block_size_after;
         block_new = util::SafeLoadAs<uint64_t>(block_base_new);
         full_slots_new =
-            static_cast<int>(CountLeadingZeros(block_new & kHighBitOfEachByte) 
>> 3);
+            static_cast<int>(std::countl_zero(block_new & kHighBitOfEachByte) 
>> 3);
       }
 
       hashes_new[block_id_new * kSlotsPerBlock + full_slots_new] = hash;
diff --git a/cpp/src/arrow/compute/row/row_internal.cc 
b/cpp/src/arrow/compute/row/row_internal.cc
index 6af5458ea9..39d0bb0c63 100644
--- a/cpp/src/arrow/compute/row/row_internal.cc
+++ b/cpp/src/arrow/compute/row/row_internal.cc
@@ -17,6 +17,8 @@
 
 #include "arrow/compute/row/row_internal.h"
 
+#include <bit>
+
 #include "arrow/compute/util.h"
 #include "arrow/util/logging_internal.h"
 
@@ -89,9 +91,9 @@ void RowTableMetadata::FromColumnMetadataVector(
   std::sort(
       column_order.begin(), column_order.end(), [&cols](uint32_t left, 
uint32_t right) {
         bool is_left_pow2 =
-            !cols[left].is_fixed_length || 
ARROW_POPCOUNT64(cols[left].fixed_length) <= 1;
-        bool is_right_pow2 = !cols[right].is_fixed_length ||
-                             ARROW_POPCOUNT64(cols[right].fixed_length) <= 1;
+            !cols[left].is_fixed_length || 
std::popcount(cols[left].fixed_length) <= 1;
+        bool is_right_pow2 =
+            !cols[right].is_fixed_length || 
std::popcount(cols[right].fixed_length) <= 1;
         bool is_left_fixedlen = cols[left].is_fixed_length;
         bool is_right_fixedlen = cols[right].is_fixed_length;
         uint32_t width_left =
@@ -127,7 +129,7 @@ void RowTableMetadata::FromColumnMetadataVector(
   for (uint32_t i = 0; i < num_cols; ++i) {
     const KeyColumnMetadata& col = cols[column_order[i]];
     if (col.is_fixed_length && col.fixed_length != 0 &&
-        ARROW_POPCOUNT64(col.fixed_length) != 1) {
+        std::popcount(col.fixed_length) != 1) {
       offset_within_row += RowTableMetadata::padding_for_alignment_within_row(
           offset_within_row, string_alignment, col);
     }
diff --git a/cpp/src/arrow/compute/row/row_internal.h 
b/cpp/src/arrow/compute/row/row_internal.h
index 219fcbc51f..1c1ed5ca7c 100644
--- a/cpp/src/arrow/compute/row/row_internal.h
+++ b/cpp/src/arrow/compute/row/row_internal.h
@@ -16,6 +16,7 @@
 // under the License.
 #pragma once
 
+#include <bit>
 #include <cstdint>
 #include <vector>
 
@@ -85,7 +86,7 @@ struct ARROW_COMPUTE_EXPORT RowTableMetadata {
   /// Alignment must be a power of 2.
   static inline uint32_t padding_for_alignment_within_row(uint32_t offset,
                                                           int 
required_alignment) {
-    ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
+    ARROW_DCHECK(std::popcount(static_cast<uint64_t>(required_alignment)) == 
1);
     return static_cast<uint32_t>((-static_cast<int32_t>(offset)) &
                                  (required_alignment - 1));
   }
@@ -94,8 +95,7 @@ struct ARROW_COMPUTE_EXPORT RowTableMetadata {
   /// choosing required alignment based on the data type of that column.
   static inline uint32_t padding_for_alignment_within_row(
       uint32_t offset, int string_alignment, const KeyColumnMetadata& 
col_metadata) {
-    if (!col_metadata.is_fixed_length ||
-        ARROW_POPCOUNT64(col_metadata.fixed_length) <= 1) {
+    if (!col_metadata.is_fixed_length || 
std::popcount(col_metadata.fixed_length) <= 1) {
       return 0;
     } else {
       return padding_for_alignment_within_row(offset, string_alignment);
@@ -106,7 +106,7 @@ struct ARROW_COMPUTE_EXPORT RowTableMetadata {
   /// Alignment must be a power of 2.
   static inline offset_type padding_for_alignment_row(offset_type row_offset,
                                                       int required_alignment) {
-    ARROW_DCHECK(ARROW_POPCOUNT64(required_alignment) == 1);
+    ARROW_DCHECK(std::popcount(static_cast<uint64_t>(required_alignment)) == 
1);
     return (-row_offset) & (required_alignment - 1);
   }
 
diff --git a/cpp/src/arrow/compute/util.cc b/cpp/src/arrow/compute/util.cc
index b90b3a6405..28bbfb7072 100644
--- a/cpp/src/arrow/compute/util.cc
+++ b/cpp/src/arrow/compute/util.cc
@@ -17,12 +17,13 @@
 
 #include "arrow/compute/util.h"
 
+#include <bit>
+
 #include "arrow/util/logging.h"
 #include "arrow/util/ubsan.h"
 
 namespace arrow {
 
-using bit_util::CountTrailingZeros;
 using internal::CpuInfo;
 
 namespace util {
@@ -65,7 +66,7 @@ inline void bits_to_indexes_helper(uint64_t word, uint16_t 
base_index, int* num_
                                    uint16_t* indexes) {
   int n = *num_indexes;
   while (word) {
-    indexes[n++] = base_index + 
static_cast<uint16_t>(CountTrailingZeros(word));
+    indexes[n++] = base_index + static_cast<uint16_t>(std::countr_zero(word));
     word &= word - 1;
   }
   *num_indexes = n;
@@ -75,7 +76,7 @@ inline void bits_filter_indexes_helper(uint64_t word, const 
uint16_t* input_inde
                                        int* num_indexes, uint16_t* indexes) {
   int n = *num_indexes;
   while (word) {
-    indexes[n++] = input_indexes[CountTrailingZeros(word)];
+    indexes[n++] = input_indexes[std::countr_zero(word)];
     word &= word - 1;
   }
   *num_indexes = n;
diff --git a/cpp/src/arrow/compute/util_avx2.cc 
b/cpp/src/arrow/compute/util_avx2.cc
index a554e0463f..9e1b7e4c0f 100644
--- a/cpp/src/arrow/compute/util_avx2.cc
+++ b/cpp/src/arrow/compute/util_avx2.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <bit>
 #include <cstring>
 
 #include "arrow/compute/util.h"
@@ -54,7 +55,7 @@ void bits_to_indexes_imp_avx2(const int num_bits, const 
uint8_t* bits, int* num_
           _pext_u64(mask, _pdep_u64(word, kEachByteIs1) * 0xff) + base;
       *reinterpret_cast<uint64_t*>(byte_indexes + num_indexes_loop) = 
byte_indexes_next;
       base += incr;
-      num_indexes_loop += static_cast<int>(arrow::bit_util::PopCount(word & 
0xff));
+      num_indexes_loop += static_cast<int>(std::popcount(word & 0xff));
       word >>= 8;
     }
     // Unpack indexes to 16-bits and either add the base of i * 64 or shuffle 
input
@@ -144,7 +145,7 @@ void bits_filter_indexes_imp_avx2(const int num_bits, const 
uint8_t* bits,
                                                       
kByteSequence_0_8_1_9_2_10_3_11,
                                                       
kByteSequence_4_12_5_13_6_14_7_15));
       _mm256_storeu_si256((__m256i*)(indexes + num_indexes), output);
-      num_indexes += static_cast<int>(arrow::bit_util::PopCount(word & 
0xffff));
+      num_indexes += static_cast<int>(std::popcount(word & 0xffff));
       word >>= 16;
       ++loop_id;
     }
diff --git a/cpp/src/arrow/testing/uniform_real.h 
b/cpp/src/arrow/testing/uniform_real.h
index 8aa04a8328..4ad106188f 100644
--- a/cpp/src/arrow/testing/uniform_real.h
+++ b/cpp/src/arrow/testing/uniform_real.h
@@ -25,6 +25,7 @@
 
 #pragma once
 
+#include <bit>
 #include <limits>
 
 #include <arrow/util/bit_util.h>
@@ -39,8 +40,8 @@ namespace detail {
 template <typename RealType, typename Rng>
 RealType generate_canonical(Rng& rng) {
   const size_t b = std::numeric_limits<RealType>::digits;
-  const size_t log2R = 63 - ::arrow::bit_util::CountLeadingZeros(
-                                static_cast<uint64_t>(Rng::max() - Rng::min()) 
+ 1);
+  const size_t log2R =
+      63 - std::countl_zero(static_cast<uint64_t>(Rng::max() - Rng::min()) + 
1);
   const size_t k = b / log2R + (b % log2R != 0) + (b == 0);
   const RealType r = static_cast<RealType>(Rng::max() - Rng::min()) + 1;
   RealType base = r;
diff --git a/cpp/src/arrow/util/align_util.h b/cpp/src/arrow/util/align_util.h
index 71920e49f4..64eb1f7ba6 100644
--- a/cpp/src/arrow/util/align_util.h
+++ b/cpp/src/arrow/util/align_util.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <algorithm>
+#include <bit>
 
 #include "arrow/memory_pool.h"
 #include "arrow/type_fwd.h"
@@ -43,7 +44,7 @@ struct BitmapWordAlignParams {
 template <uint64_t ALIGN_IN_BYTES>
 inline BitmapWordAlignParams BitmapWordAlign(const uint8_t* data, int64_t 
bit_offset,
                                              int64_t length) {
-  static_assert(bit_util::IsPowerOf2(ALIGN_IN_BYTES),
+  static_assert(std::has_single_bit(ALIGN_IN_BYTES),
                 "ALIGN_IN_BYTES should be a positive power of two");
   constexpr uint64_t ALIGN_IN_BITS = ALIGN_IN_BYTES * 8;
 
diff --git a/cpp/src/arrow/util/basic_decimal.cc 
b/cpp/src/arrow/util/basic_decimal.cc
index fc69bcf6f8..eddb1aae7b 100644
--- a/cpp/src/arrow/util/basic_decimal.cc
+++ b/cpp/src/arrow/util/basic_decimal.cc
@@ -19,6 +19,7 @@
 
 #include <algorithm>
 #include <array>
+#include <bit>
 #include <climits>
 #include <cstdint>
 #include <cstdlib>
@@ -388,7 +389,7 @@ BasicDecimal64 operator%(const BasicDecimal64& left, const 
BasicDecimal64& right
 
 template <typename BaseType>
 int32_t SmallBasicDecimal<BaseType>::CountLeadingBinaryZeros() const {
-  return 
bit_util::CountLeadingZeros(static_cast<std::make_unsigned_t<BaseType>>(value_));
+  return std::countl_zero(static_cast<std::make_unsigned_t<BaseType>>(value_));
 }
 
 // same as kDecimal128PowersOfTen[38] - 1
@@ -892,7 +893,7 @@ static inline DecimalStatus DecimalDivide(const 
DecimalClass& dividend,
   // Normalize by shifting both by a multiple of 2 so that
   // the digit guessing is better. The requirement is that
   // divisor_array[0] is greater than 2**31.
-  int64_t normalize_bits = bit_util::CountLeadingZeros(divisor_array[0]);
+  int64_t normalize_bits = std::countl_zero(divisor_array[0]);
   ShiftArrayLeft(divisor_array, divisor_length, normalize_bits);
   ShiftArrayLeft(dividend_array, dividend_length, normalize_bits);
 
@@ -1155,9 +1156,9 @@ int32_t BasicDecimal128::CountLeadingBinaryZeros() const {
   DCHECK_GE(*this, BasicDecimal128(0));
 
   if (high_bits() == 0) {
-    return bit_util::CountLeadingZeros(low_bits()) + 64;
+    return std::countl_zero(low_bits()) + 64;
   } else {
-    return bit_util::CountLeadingZeros(static_cast<uint64_t>(high_bits()));
+    return std::countl_zero(static_cast<uint64_t>(high_bits()));
   }
 }
 
diff --git a/cpp/src/arrow/util/bit_block_counter.h 
b/cpp/src/arrow/util/bit_block_counter.h
index 73a1ee8600..82651a9d38 100644
--- a/cpp/src/arrow/util/bit_block_counter.h
+++ b/cpp/src/arrow/util/bit_block_counter.h
@@ -18,6 +18,7 @@
 #pragma once
 
 #include <algorithm>
+#include <bit>
 #include <cstdint>
 #include <limits>
 #include <memory>
@@ -130,10 +131,10 @@ class ARROW_EXPORT BitBlockCounter {
       if (bits_remaining_ < kFourWordsBits) {
         return GetBlockSlow(kFourWordsBits);
       }
-      total_popcount += bit_util::PopCount(LoadWord(bitmap_));
-      total_popcount += bit_util::PopCount(LoadWord(bitmap_ + 8));
-      total_popcount += bit_util::PopCount(LoadWord(bitmap_ + 16));
-      total_popcount += bit_util::PopCount(LoadWord(bitmap_ + 24));
+      total_popcount += std::popcount(LoadWord(bitmap_));
+      total_popcount += std::popcount(LoadWord(bitmap_ + 8));
+      total_popcount += std::popcount(LoadWord(bitmap_ + 16));
+      total_popcount += std::popcount(LoadWord(bitmap_ + 24));
     } else {
       // When the offset is > 0, we need there to be a word beyond the last
       // aligned word in the bitmap for the bit shifting logic.
@@ -142,16 +143,16 @@ class ARROW_EXPORT BitBlockCounter {
       }
       auto current = LoadWord(bitmap_);
       auto next = LoadWord(bitmap_ + 8);
-      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+      total_popcount += std::popcount(ShiftWord(current, next, offset_));
       current = next;
       next = LoadWord(bitmap_ + 16);
-      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+      total_popcount += std::popcount(ShiftWord(current, next, offset_));
       current = next;
       next = LoadWord(bitmap_ + 24);
-      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+      total_popcount += std::popcount(ShiftWord(current, next, offset_));
       current = next;
       next = LoadWord(bitmap_ + 32);
-      total_popcount += bit_util::PopCount(ShiftWord(current, next, offset_));
+      total_popcount += std::popcount(ShiftWord(current, next, offset_));
     }
     bitmap_ += bit_util::BytesForBits(kFourWordsBits);
     bits_remaining_ -= kFourWordsBits;
@@ -175,15 +176,15 @@ class ARROW_EXPORT BitBlockCounter {
       if (bits_remaining_ < kWordBits) {
         return GetBlockSlow(kWordBits);
       }
-      popcount = bit_util::PopCount(LoadWord(bitmap_));
+      popcount = std::popcount(LoadWord(bitmap_));
     } else {
       // When the offset is > 0, we need there to be a word beyond the last
       // aligned word in the bitmap for the bit shifting logic.
       if (bits_remaining_ < 2 * kWordBits - offset_) {
         return GetBlockSlow(kWordBits);
       }
-      popcount = bit_util::PopCount(
-          ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), offset_));
+      popcount =
+          std::popcount(ShiftWord(LoadWord(bitmap_), LoadWord(bitmap_ + 8), 
offset_));
     }
     bitmap_ += kWordBits / 8;
     bits_remaining_ -= kWordBits;
@@ -318,14 +319,13 @@ class ARROW_EXPORT BinaryBitBlockCounter {
 
     int64_t popcount = 0;
     if (left_offset_ == 0 && right_offset_ == 0) {
-      popcount =
-          bit_util::PopCount(Op::Call(LoadWord(left_bitmap_), 
LoadWord(right_bitmap_)));
+      popcount = std::popcount(Op::Call(LoadWord(left_bitmap_), 
LoadWord(right_bitmap_)));
     } else {
       auto left_word =
           ShiftWord(LoadWord(left_bitmap_), LoadWord(left_bitmap_ + 8), 
left_offset_);
       auto right_word =
           ShiftWord(LoadWord(right_bitmap_), LoadWord(right_bitmap_ + 8), 
right_offset_);
-      popcount = bit_util::PopCount(Op::Call(left_word, right_word));
+      popcount = std::popcount(Op::Call(left_word, right_word));
     }
     left_bitmap_ += kWordBits / 8;
     right_bitmap_ += kWordBits / 8;
diff --git a/cpp/src/arrow/util/bit_run_reader.h 
b/cpp/src/arrow/util/bit_run_reader.h
index 7bb0014027..1a9638880c 100644
--- a/cpp/src/arrow/util/bit_run_reader.h
+++ b/cpp/src/arrow/util/bit_run_reader.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <bit>
 #include <cassert>
 #include <cstdint>
 #include <cstring>
@@ -93,7 +94,7 @@ class ARROW_EXPORT BitRunReader {
       return {/*length=*/0, false};
     }
     // This implementation relies on a efficient implementations of
-    // CountTrailingZeros and assumes that runs are more often then
+    // std::countr_zero and assumes that runs are more often then
     // not.  The logic is to incrementally find the next bit change
     // from the current position.  This is done by zeroing all
     // bits in word_ up to position_ and using the TrailingZeroCount
@@ -104,12 +105,12 @@ class ARROW_EXPORT BitRunReader {
 
     int64_t start_position = position_;
     int64_t start_bit_offset = start_position & 63;
-    // Invert the word for proper use of CountTrailingZeros and
-    // clear bits so CountTrailingZeros can do it magic.
+    // Invert the word for proper use of std::countr_zero and
+    // clear bits so std::countr_zero can do it magic.
     word_ = ~word_ & 
~bit_util::LeastSignificantBitMask<uint64_t>(start_bit_offset);
 
     // Go  forward until the next change from unset to set.
-    int64_t new_bits = bit_util::CountTrailingZeros(word_) - start_bit_offset;
+    int64_t new_bits = std::countr_zero(word_) - start_bit_offset;
     position_ += new_bits;
 
     if (ARROW_PREDICT_FALSE(bit_util::IsMultipleOf64(position_)) &&
@@ -129,7 +130,7 @@ class ARROW_EXPORT BitRunReader {
       // Advance the position of the bitmap for loading.
       bitmap_ += sizeof(uint64_t);
       LoadNextWord();
-      new_bits = bit_util::CountTrailingZeros(word_);
+      new_bits = std::countr_zero(word_);
       // Continue calculating run length.
       position_ += new_bits;
     } while (ARROW_PREDICT_FALSE(bit_util::IsMultipleOf64(position_)) &&
@@ -155,9 +156,9 @@ class ARROW_EXPORT BitRunReader {
     }
 
     // Two cases:
-    //   1. For unset, CountTrailingZeros works naturally so we don't
+    //   1. For unset, std::countr_zero works naturally so we don't
     //   invert the word.
-    //   2. Otherwise invert so we can use CountTrailingZeros.
+    //   2. Otherwise invert so we can use std::countr_zero.
     if (current_run_bit_set_) {
       word_ = ~word_;
     }
@@ -438,12 +439,12 @@ class BaseSetBitRunReader {
 
 template <>
 inline int BaseSetBitRunReader<false>::CountFirstZeros(uint64_t word) {
-  return bit_util::CountTrailingZeros(word);
+  return std::countr_zero(word);
 }
 
 template <>
 inline int BaseSetBitRunReader<true>::CountFirstZeros(uint64_t word) {
-  return bit_util::CountLeadingZeros(word);
+  return std::countl_zero(word);
 }
 
 template <>
diff --git a/cpp/src/arrow/util/bit_util.h b/cpp/src/arrow/util/bit_util.h
index c7849db871..0d2b2655ea 100644
--- a/cpp/src/arrow/util/bit_util.h
+++ b/cpp/src/arrow/util/bit_util.h
@@ -17,20 +17,7 @@
 
 #pragma once
 
-#if defined(_MSC_VER)
-#  if defined(_M_AMD64) || defined(_M_X64)
-#    include <intrin.h>  // IWYU pragma: keep
-#  endif
-
-#  pragma intrinsic(_BitScanReverse)
-#  pragma intrinsic(_BitScanForward)
-#  define ARROW_POPCOUNT64 __popcnt64
-#  define ARROW_POPCOUNT32 __popcnt
-#else
-#  define ARROW_POPCOUNT64 __builtin_popcountll
-#  define ARROW_POPCOUNT32 __builtin_popcount
-#endif
-
+#include <bit>
 #include <cstdint>
 #include <type_traits>
 
@@ -49,26 +36,6 @@ typename std::make_unsigned<Integer>::type 
as_unsigned(Integer x) {
 
 namespace bit_util {
 
-// The number of set bits in a given unsigned byte value, pre-computed
-//
-// Generated with the following Python code
-// output = 'static constexpr uint8_t kBytePopcount[] = {{{0}}};'
-// popcounts = [str(bin(i).count('1')) for i in range(0, 256)]
-// print(output.format(', '.join(popcounts)))
-static constexpr uint8_t kBytePopcount[] = {
-    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 
3, 3, 4, 3,
-    4, 4, 5, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 
4, 5, 3, 4,
-    4, 5, 4, 5, 5, 6, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 
4, 3, 4, 4,
-    5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 
3, 4, 4, 5,
-    4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 
4, 4, 5, 2,
-    3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 
4, 5, 4, 5,
-    5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 2, 3, 3, 4, 3, 4, 4, 
5, 3, 4, 4,
-    5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 3, 4, 4, 5, 
4, 5, 5, 6,
-    4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8};
-
-static inline uint64_t PopCount(uint64_t bitmap) { return 
ARROW_POPCOUNT64(bitmap); }
-static inline uint32_t PopCount(uint32_t bitmap) { return 
ARROW_POPCOUNT32(bitmap); }
-
 //
 // Bit-related computations on integer values
 //
@@ -84,14 +51,6 @@ constexpr int64_t BytesForBits(int64_t bits) {
   return (bits >> 3) + ((bits & 7) != 0);
 }
 
-constexpr bool IsPowerOf2(int64_t value) {
-  return value > 0 && (value & (value - 1)) == 0;
-}
-
-constexpr bool IsPowerOf2(uint64_t value) {
-  return value > 0 && (value & (value - 1)) == 0;
-}
-
 // Returns the smallest power of two that contains v.  If v is already a
 // power of two, it is returned as is.
 static inline int64_t NextPower2(int64_t n) {
@@ -140,13 +99,10 @@ constexpr int64_t RoundDown(int64_t value, int64_t factor) 
{
 // The result is undefined on overflow, i.e. if `value > 2**64 - factor`,
 // since we cannot return the correct result which would be 2**64.
 constexpr int64_t RoundUpToPowerOf2(int64_t value, int64_t factor) {
-  // DCHECK(value >= 0);
-  // DCHECK(IsPowerOf2(factor));
   return (value + (factor - 1)) & ~(factor - 1);
 }
 
 constexpr uint64_t RoundUpToPowerOf2(uint64_t value, uint64_t factor) {
-  // DCHECK(IsPowerOf2(factor));
   return (value + (factor - 1)) & ~(factor - 1);
 }
 
@@ -179,106 +135,10 @@ static inline uint64_t TrailingBits(uint64_t v, int 
num_bits) {
   return (v << n) >> n;
 }
 
-/// \brief Count the number of leading zeros in an unsigned integer.
-static inline int CountLeadingZeros(uint32_t value) {
-#if defined(__clang__) || defined(__GNUC__)
-  if (value == 0) return 32;
-  return static_cast<int>(__builtin_clz(value));
-#elif defined(_MSC_VER)
-  unsigned long index;                                               // NOLINT
-  if (_BitScanReverse(&index, static_cast<unsigned long>(value))) {  // NOLINT
-    return 31 - static_cast<int>(index);
-  } else {
-    return 32;
-  }
-#else
-  int bitpos = 0;
-  while (value != 0) {
-    value >>= 1;
-    ++bitpos;
-  }
-  return 32 - bitpos;
-#endif
-}
-
-static inline int CountLeadingZeros(uint64_t value) {
-#if defined(__clang__) || defined(__GNUC__)
-  if (value == 0) return 64;
-  return static_cast<int>(__builtin_clzll(value));
-#elif defined(_MSC_VER)
-  unsigned long index;                     // NOLINT
-  if (_BitScanReverse64(&index, value)) {  // NOLINT
-    return 63 - static_cast<int>(index);
-  } else {
-    return 64;
-  }
-#else
-  int bitpos = 0;
-  while (value != 0) {
-    value >>= 1;
-    ++bitpos;
-  }
-  return 64 - bitpos;
-#endif
-}
-
-static inline int CountTrailingZeros(uint32_t value) {
-#if defined(__clang__) || defined(__GNUC__)
-  if (value == 0) return 32;
-  return static_cast<int>(__builtin_ctzl(value));
-#elif defined(_MSC_VER)
-  unsigned long index;  // NOLINT
-  if (_BitScanForward(&index, value)) {
-    return static_cast<int>(index);
-  } else {
-    return 32;
-  }
-#else
-  int bitpos = 0;
-  if (value) {
-    while (value & 1 == 0) {
-      value >>= 1;
-      ++bitpos;
-    }
-  } else {
-    bitpos = 32;
-  }
-  return bitpos;
-#endif
-}
-
-static inline int CountTrailingZeros(uint64_t value) {
-#if defined(__clang__) || defined(__GNUC__)
-  if (value == 0) return 64;
-  return static_cast<int>(__builtin_ctzll(value));
-#elif defined(_MSC_VER)
-  unsigned long index;  // NOLINT
-  if (_BitScanForward64(&index, value)) {
-    return static_cast<int>(index);
-  } else {
-    return 64;
-  }
-#else
-  int bitpos = 0;
-  if (value) {
-    while (value & 1 == 0) {
-      value >>= 1;
-      ++bitpos;
-    }
-  } else {
-    bitpos = 64;
-  }
-  return bitpos;
-#endif
-}
-
-// Returns the minimum number of bits needed to represent an unsigned value
-static inline int NumRequiredBits(uint64_t x) { return 64 - 
CountLeadingZeros(x); }
-
 // Returns ceil(log2(x)).
 static inline int Log2(uint64_t x) {
   // DCHECK_GT(x, 0);
-  return NumRequiredBits(x - 1);
+  return std::bit_width(x - 1);
 }
 
 //
diff --git a/cpp/src/arrow/util/bit_util_test.cc 
b/cpp/src/arrow/util/bit_util_test.cc
index 13aa319d70..1e7714540e 100644
--- a/cpp/src/arrow/util/bit_util_test.cc
+++ b/cpp/src/arrow/util/bit_util_test.cc
@@ -739,79 +739,10 @@ TEST(BitUtil, Log2) {
   EXPECT_EQ(bit_util::Log2(ULLONG_MAX), 64);
 }
 
-TEST(BitUtil, NumRequiredBits) {
-  EXPECT_EQ(bit_util::NumRequiredBits(0), 0);
-  EXPECT_EQ(bit_util::NumRequiredBits(1), 1);
-  EXPECT_EQ(bit_util::NumRequiredBits(2), 2);
-  EXPECT_EQ(bit_util::NumRequiredBits(3), 2);
-  EXPECT_EQ(bit_util::NumRequiredBits(4), 3);
-  EXPECT_EQ(bit_util::NumRequiredBits(5), 3);
-  EXPECT_EQ(bit_util::NumRequiredBits(7), 3);
-  EXPECT_EQ(bit_util::NumRequiredBits(8), 4);
-  EXPECT_EQ(bit_util::NumRequiredBits(9), 4);
-  EXPECT_EQ(bit_util::NumRequiredBits(UINT_MAX - 1), 32);
-  EXPECT_EQ(bit_util::NumRequiredBits(UINT_MAX), 32);
-  EXPECT_EQ(bit_util::NumRequiredBits(static_cast<uint64_t>(UINT_MAX) + 1), 
33);
-  EXPECT_EQ(bit_util::NumRequiredBits(ULLONG_MAX / 2), 63);
-  EXPECT_EQ(bit_util::NumRequiredBits(ULLONG_MAX / 2 + 1), 64);
-  EXPECT_EQ(bit_util::NumRequiredBits(ULLONG_MAX - 1), 64);
-  EXPECT_EQ(bit_util::NumRequiredBits(ULLONG_MAX), 64);
-}
-
 #define U32(x) static_cast<uint32_t>(x)
 #define U64(x) static_cast<uint64_t>(x)
 #define S64(x) static_cast<int64_t>(x)
 
-TEST(BitUtil, CountLeadingZeros) {
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(0)), 32);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(1)), 31);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(2)), 30);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(3)), 30);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(4)), 29);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(7)), 29);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(8)), 28);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(UINT_MAX / 2)), 1);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(UINT_MAX / 2 + 1)), 0);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U32(UINT_MAX)), 0);
-
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(0)), 64);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(1)), 63);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(2)), 62);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(3)), 62);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(4)), 61);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(7)), 61);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(8)), 60);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(UINT_MAX)), 32);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(UINT_MAX) + 1), 31);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(ULLONG_MAX / 2)), 1);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(ULLONG_MAX / 2 + 1)), 0);
-  EXPECT_EQ(bit_util::CountLeadingZeros(U64(ULLONG_MAX)), 0);
-}
-
-TEST(BitUtil, CountTrailingZeros) {
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(0)), 32);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(1) << 31), 31);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(1) << 30), 30);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(1) << 29), 29);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(1) << 28), 28);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(8)), 3);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(4)), 2);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(2)), 1);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(1)), 0);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U32(ULONG_MAX)), 0);
-
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(0)), 64);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(1) << 63), 63);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(1) << 62), 62);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(1) << 61), 61);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(1) << 60), 60);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(8)), 3);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(4)), 2);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(2)), 1);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(1)), 0);
-  EXPECT_EQ(bit_util::CountTrailingZeros(U64(ULLONG_MAX)), 0);
-}
-
 TEST(BitUtil, RoundUpToPowerOf2) {
   EXPECT_EQ(bit_util::RoundUpToPowerOf2(S64(7), 8), 8);
   EXPECT_EQ(bit_util::RoundUpToPowerOf2(S64(8), 8), 8);
diff --git a/cpp/src/arrow/util/bitmap_ops.cc b/cpp/src/arrow/util/bitmap_ops.cc
index ce2224f2f6..cc24146ae9 100644
--- a/cpp/src/arrow/util/bitmap_ops.cc
+++ b/cpp/src/arrow/util/bitmap_ops.cc
@@ -61,10 +61,10 @@ int64_t CountSetBits(const uint8_t* data, int64_t 
bit_offset, int64_t length) {
     // Unroll the loop for better performance
     for (int64_t i = 0; i < words_rounded; i += kCountUnrollFactor) {
       // (hand-unrolled as some gcc versions would unnest a nested `for` loop)
-      count_unroll[0] += bit_util::PopCount(u64_data[0]);
-      count_unroll[1] += bit_util::PopCount(u64_data[1]);
-      count_unroll[2] += bit_util::PopCount(u64_data[2]);
-      count_unroll[3] += bit_util::PopCount(u64_data[3]);
+      count_unroll[0] += std::popcount(u64_data[0]);
+      count_unroll[1] += std::popcount(u64_data[1]);
+      count_unroll[2] += std::popcount(u64_data[2]);
+      count_unroll[3] += std::popcount(u64_data[3]);
       u64_data += kCountUnrollFactor;
     }
     for (int64_t k = 0; k < kCountUnrollFactor; k++) {
@@ -73,7 +73,7 @@ int64_t CountSetBits(const uint8_t* data, int64_t bit_offset, 
int64_t length) {
 
     // The trailing part
     for (; u64_data < end; ++u64_data) {
-      count += bit_util::PopCount(*u64_data);
+      count += std::popcount(*u64_data);
     }
   }
 
diff --git a/cpp/src/arrow/util/bitmap_reader_benchmark.cc 
b/cpp/src/arrow/util/bitmap_reader_benchmark.cc
index b3c199ec3b..3563ba75ad 100644
--- a/cpp/src/arrow/util/bitmap_reader_benchmark.cc
+++ b/cpp/src/arrow/util/bitmap_reader_benchmark.cc
@@ -15,6 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.
 
+#include <bit>
 #include <bitset>
 #include <cstdint>
 #include <cstdlib>
@@ -88,9 +89,9 @@ static void BitmapWordReaderBench(benchmark::State& state) {
       //      if (word == UINT64_MAX) {
       //        set_bits += sizeof(uint64_t) * 8;
       //      } else if (word) {
-      //        set_bits += PopCount(word);
+      //        set_bits += std::popcount(word);
       //      }
-      set_bits += PopCount(word);
+      set_bits += std::popcount(word);
       benchmark::DoNotOptimize(set_bits);
     }
 
@@ -98,7 +99,7 @@ static void BitmapWordReaderBench(benchmark::State& state) {
     while (cnt--) {
       int valid_bits;
       const auto& byte = 
static_cast<uint32_t>(counter.NextTrailingByte(valid_bits));
-      set_bits += PopCount(kPrecedingBitmask[valid_bits] & byte);
+      set_bits += std::popcount(kPrecedingBitmask[valid_bits] & byte);
       benchmark::DoNotOptimize(set_bits);
     }
     benchmark::ClobberMemory();
diff --git a/cpp/src/arrow/util/bitmap_writer.h 
b/cpp/src/arrow/util/bitmap_writer.h
index c9ce8012f3..8c47793fde 100644
--- a/cpp/src/arrow/util/bitmap_writer.h
+++ b/cpp/src/arrow/util/bitmap_writer.h
@@ -17,6 +17,7 @@
 
 #pragma once
 
+#include <bit>
 #include <cstdint>
 #include <cstring>
 
@@ -112,7 +113,7 @@ class FirstTimeBitmapWriter {
 
     // Update state variables except for current_byte_ here.
     position_ += number_of_bits;
-    int64_t bit_offset = 
bit_util::CountTrailingZeros(static_cast<uint32_t>(bit_mask_));
+    int64_t bit_offset = std::countr_zero(static_cast<uint32_t>(bit_mask_));
     bit_mask_ = bit_util::kBitmask[(bit_offset + number_of_bits) % 8];
     byte_offset_ += (bit_offset + number_of_bits) / 8;
 
diff --git a/cpp/src/arrow/util/rle_encoding_test.cc 
b/cpp/src/arrow/util/rle_encoding_test.cc
index b2d4f7df6f..f77f91f6cd 100644
--- a/cpp/src/arrow/util/rle_encoding_test.cc
+++ b/cpp/src/arrow/util/rle_encoding_test.cc
@@ -17,6 +17,7 @@
 
 // From Apache Impala (incubating) as of 2016-01-29
 
+#include <bit>
 #include <cstdint>
 #include <cstring>
 #include <random>
@@ -912,7 +913,7 @@ TEST(BitRle, Random) {
       }
       parity = !parity;
     }
-    if (!CheckRoundTrip(values, bit_util::NumRequiredBits(values.size()))) {
+    if (!CheckRoundTrip(values, std::bit_width(values.size()))) {
       FAIL() << "failing seed: " << seed;
     }
   }
diff --git a/cpp/src/gandiva/selection_vector.cc 
b/cpp/src/gandiva/selection_vector.cc
index 8d5f9f4210..0d8ecb66b7 100644
--- a/cpp/src/gandiva/selection_vector.cc
+++ b/cpp/src/gandiva/selection_vector.cc
@@ -17,6 +17,7 @@
 
 #include "gandiva/selection_vector.h"
 
+#include <bit>
 #include <memory>
 #include <sstream>
 #include <utility>
@@ -64,7 +65,7 @@ Status SelectionVector::PopulateFromBitMap(const uint8_t* 
bitmap, int64_t bitmap
 #  pragma warning(pop)
 #endif
 
-      int pos_in_word = arrow::bit_util::CountTrailingZeros(highest_only);
+      int pos_in_word = std::countr_zero(highest_only);
 
       int64_t pos_in_bitmap = bitmap_idx * 64 + pos_in_word;
       if (pos_in_bitmap > max_bitmap_index) {
diff --git a/cpp/src/parquet/chunker_internal.cc 
b/cpp/src/parquet/chunker_internal.cc
index cc0a386f4c..5cd31f8334 100644
--- a/cpp/src/parquet/chunker_internal.cc
+++ b/cpp/src/parquet/chunker_internal.cc
@@ -17,6 +17,7 @@
 
 #include "parquet/chunker_internal.h"
 
+#include <bit>
 #include <cstdint>
 #include <iterator>
 #include <string>
@@ -85,7 +86,8 @@ uint64_t CalculateMask(int64_t min_chunk_size, int64_t 
max_chunk_size, int norm_
 
   // assuming that the gear hash has a uniform distribution, we can calculate 
the mask
   // by taking the floor(log2(target_size))
-  int mask_bits = std::max(0, ::arrow::bit_util::NumRequiredBits(target_size) 
- 1);
+  auto target_bits = std::bit_width(static_cast<uint64_t>(target_size));
+  int mask_bits = target_bits == 0 ? 0 : static_cast<int>(target_bits - 1);
 
   // a user defined `norm_level` can be used to adjust the mask size, hence 
the matching
   // probability, by increasing the norm_level we increase the probability of 
matching
diff --git a/cpp/src/parquet/encoder.cc b/cpp/src/parquet/encoder.cc
index 0e8c0ba32b..97a5d77d41 100644
--- a/cpp/src/parquet/encoder.cc
+++ b/cpp/src/parquet/encoder.cc
@@ -18,6 +18,7 @@
 #include "parquet/encoding.h"
 
 #include <algorithm>
+#include <bit>
 #include <cstdint>
 #include <cstdlib>
 #include <limits>
@@ -1164,8 +1165,8 @@ void DeltaBitPackEncoder<DType>::FlushBlock() {
 
     // The minimum number of bits required to write any of values in deltas_ 
vector.
     // See overflow comment above.
-    const auto bit_width = bit_width_data[i] = bit_util::NumRequiredBits(
-        static_cast<UT>(max_delta) - static_cast<UT>(min_delta));
+    const auto bit_width = bit_width_data[i] =
+        std::bit_width(static_cast<UT>(max_delta) - 
static_cast<UT>(min_delta));
 
     for (uint32_t j = start; j < start + values_current_mini_block; j++) {
       // Convert delta to frame of reference. See overflow comment above.
diff --git a/cpp/src/parquet/level_conversion_inc.h 
b/cpp/src/parquet/level_conversion_inc.h
index 335f5b9215..33b4fae084 100644
--- a/cpp/src/parquet/level_conversion_inc.h
+++ b/cpp/src/parquet/level_conversion_inc.h
@@ -19,6 +19,7 @@
 #include "parquet/level_conversion.h"
 
 #include <algorithm>
+#include <bit>
 #include <cstdint>
 #include <limits>
 
@@ -248,7 +249,7 @@ inline uint64_t ExtractBitsSoftware(uint64_t bitmap, 
uint64_t select_bitmap) {
   int bit_len = 0;
   constexpr uint8_t kLookupMask = (1U << kLookupBits) - 1;
   while (select_bitmap != 0) {
-    const auto mask_len = ARROW_POPCOUNT32(select_bitmap & kLookupMask);
+    const auto mask_len = std::popcount(select_bitmap & kLookupMask);
     const uint64_t value = kPextTable[select_bitmap & kLookupMask][bitmap & 
kLookupMask];
     bit_value |= (value << bit_len);
     bit_len += mask_len;
@@ -309,12 +310,12 @@ int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, 
const int64_t batch_si
         ::arrow::bit_util::FromLittleEndian(internal::GreaterThanBitmap(
             def_levels, batch_size, level_info.repeated_ancestor_def_level - 
1)));
     auto selected_bits = ExtractBits(defined_bitmap, present_bitmap);
-    int64_t selected_count = ::arrow::bit_util::PopCount(present_bitmap);
+    int64_t selected_count = std::popcount(present_bitmap);
     if (ARROW_PREDICT_FALSE(selected_count > upper_bound_remaining)) {
       throw ParquetException("Values read exceeded upper bound");
     }
     writer->AppendWord(selected_bits, selected_count);
-    return ::arrow::bit_util::PopCount(selected_bits);
+    return std::popcount(selected_bits);
   } else {
     if (ARROW_PREDICT_FALSE(batch_size > upper_bound_remaining)) {
       std::stringstream ss;
@@ -323,7 +324,7 @@ int64_t DefLevelsBatchToBitmap(const int16_t* def_levels, 
const int64_t batch_si
     }
 
     writer->AppendWord(defined_bitmap, batch_size);
-    return ::arrow::bit_util::PopCount(defined_bitmap);
+    return std::popcount(defined_bitmap);
   }
 }

(arrow) branch main updated: GH-48591: [C++] Remove some bit utils from bit_utils.h and replace them with C++ 20 built in functions (#49298)

Reply via email to