https://github.com/jhuber6 created https://github.com/llvm/llvm-project/pull/189387
Summary: The `ull` suffix can mean 128 bits on some architectures. Replace this with the `stdint.h` constructor to be certain. >From 889d8222375da00fccb196450805c003717b7300 Mon Sep 17 00:00:00 2001 From: Joseph Huber <[email protected]> Date: Mon, 30 Mar 2026 09:03:14 -0500 Subject: [PATCH] [Clang] Fix constant bit widths in gpuintrin.h Summary: The `ull` suffix can mean 128 bits on some architectures. Replace this with the `stdint.h` constructor to be certain. --- clang/lib/Headers/gpuintrin.h | 25 ++++++++++++------------- clang/lib/Headers/nvptxintrin.h | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index ef1446a3ac77b..12176847776be 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -147,11 +147,10 @@ __gpu_is_first_in_lane(uint64_t __lane_mask) { // Copies the value from the first active thread to the rest. _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x) { - uint32_t __hi = (uint32_t)(__x >> 32ull); - uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFFull); - return ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __hi) << 32ull) | - ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __lo) & - 0xFFFFFFFFull); + uint32_t __hi = (uint32_t)(__x >> 32); + uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF); + return ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __hi) << 32) | + ((uint64_t)__gpu_read_first_lane_u32(__lane_mask, __lo) & 0xFFFFFFFF); } // Gets the first floating point value from the active lanes. @@ -174,11 +173,10 @@ __gpu_read_first_lane_f64(uint64_t __lane_mask, double __x) { _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, uint32_t __width) { - uint32_t __hi = (uint32_t)(__x >> 32ull); + uint32_t __hi = (uint32_t)(__x >> 32); uint32_t __lo = (uint32_t)(__x & 0xFFFFFFFF); uint32_t __mask = (uint32_t)__lane_mask; - return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width) - << 32ull) | + return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width) << 32) | ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width)); } @@ -211,7 +209,7 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx, double __x, _DEFAULT_FN_ATTRS static __inline__ __type \ __gpu_suffix_scan_##__prefix##_##__suffix(uint64_t __lane_mask, \ __type __x) { \ - uint64_t __above = __lane_mask & -(2ull << __gpu_lane_id()); \ + uint64_t __above = __lane_mask & -(UINT64_C(2) << __gpu_lane_id()); \ for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \ uint32_t __src = __above ? __builtin_ctzg(__above) : __gpu_lane_id(); \ __type __result = __gpu_shuffle_idx_##__suffix(__lane_mask, __src, __x, \ @@ -226,7 +224,7 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx, double __x, _DEFAULT_FN_ATTRS static __inline__ __type \ __gpu_prefix_scan_##__prefix##_##__suffix(uint64_t __lane_mask, \ __type __x) { \ - uint64_t __below = __lane_mask & ((1ull << __gpu_lane_id()) - 1); \ + uint64_t __below = __lane_mask & ((UINT64_C(1) << __gpu_lane_id()) - 1); \ for (uint32_t __step = 1; __step < __gpu_num_lanes(); __step *= 2) { \ uint32_t __src = \ __below ? (63 - __builtin_clzg(__below)) : __gpu_lane_id(); \ @@ -234,7 +232,8 @@ __gpu_shuffle_idx_f64(uint64_t __lane_mask, uint32_t __idx, double __x, __gpu_num_lanes()); \ __x = __op(__x, __below ? __result : (__type)__identity); \ for (uint32_t __i = 0; __i < __step; ++__i) \ - __below ^= (1ull << (63 - __builtin_clzg(__below, 0))) & __below; \ + __below ^= \ + (UINT64_C(1) << (63 - __builtin_clzg(__below, 0))) & __below; \ } \ return __x; \ } \ @@ -338,7 +337,7 @@ __gpu_match_all_u32_impl(uint64_t __lane_mask, uint32_t __x) { uint32_t __first = __gpu_shuffle_idx_u32( __lane_mask, __builtin_ctzg(__lane_mask), __x, __gpu_num_lanes()); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); - return __ballot == __lane_mask ? __lane_mask : 0ull; + return __ballot == __lane_mask ? __lane_mask : UINT64_C(0); } // Returns the current lane mask if every lane contains __x. @@ -347,7 +346,7 @@ __gpu_match_all_u64_impl(uint64_t __lane_mask, uint64_t __x) { uint64_t __first = __gpu_shuffle_idx_u64( __lane_mask, __builtin_ctzg(__lane_mask), __x, __gpu_num_lanes()); uint64_t __ballot = __gpu_ballot(__lane_mask, __x == __first); - return __ballot == __lane_mask ? __lane_mask : 0ull; + return __ballot == __lane_mask ? __lane_mask : UINT64_C(0); } _Pragma("omp end declare variant"); diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index b2e538580ba10..57a6a2cd08633 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -137,7 +137,7 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, uint32_t __width) { // Mask out inactive lanes to match AMDGPU behavior. uint32_t __mask = (uint32_t)__lane_mask; - bool __bitmask = (1ull << __idx) & __lane_mask; + bool __bitmask = (UINT64_C(1) << __idx) & __lane_mask; return -__bitmask & __nvvm_shfl_sync_idx_i32(__mask, __x, __idx, ((__gpu_num_lanes() - __width) << 8u) | 0x1f); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
