[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
https://github.com/JonChesterfield updated https://github.com/llvm/llvm-project/pull/131134 >From 0466c31d1e0b10aa2d2352bb6befd36eb5306f9c Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 13 Mar 2025 12:49:42 + Subject: [PATCH] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers --- clang/lib/Headers/amdgpuintrin.h | 22 ++ clang/lib/Headers/gpuintrin.h| 26 +++--- clang/lib/Headers/nvptxintrin.h | 27 --- 3 files changed, 25 insertions(+), 50 deletions(-) diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index 839a05175cf3e..0c543ef7d3659 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -13,11 +13,8 @@ #error "This file is intended for AMDGPU targets or offloading to AMDGPU" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __GPUINTRIN_H +#warning "This file is intended as an implementation detail of gpuintrin.h" #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -146,17 +143,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, return __builtin_amdgcn_ds_bpermute(__lane << 2, __x); } -// Shuffles the the lanes inside the wavefront according to the given index. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, - uint32_t __width) { - uint32_t __hi = (uint32_t)(__x >> 32ull); - uint32_t __lo = (uint32_t)(__x & 0x); - return ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __hi, __width) - << 32ull) | - ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width)); -} - // Returns a bitmask marking all lanes that have the same value of __x. _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { @@ -238,8 +224,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); -#endif - #endif // __AMDGPUINTRIN_H diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index 4181628d18048..ac79d685337c5 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -25,6 +25,13 @@ #endif #endif +#include + +#if !defined(__cplusplus) +_Pragma("push_macro(\"bool\")"); +#define bool _Bool +#endif + #if defined(__NVPTX__) #include #elif defined(__AMDGPU__) @@ -33,13 +40,6 @@ #error "This header is only meant to be used on GPU architectures." #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool -#endif - _Pragma("omp begin declare target device_type(nohost)"); _Pragma("omp begin declare variant match(device = {kind(gpu)})"); @@ -141,6 +141,18 @@ __gpu_read_first_lane_f64(uint64_t __lane_mask, double __x) { __builtin_bit_cast(uint64_t, __x))); } +// Shuffles the the lanes according to the given index. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, + uint32_t __width) { + uint32_t __hi = (uint32_t)(__x >> 32ull); + uint32_t __lo = (uint32_t)(__x & 0x); + uint32_t __mask = (uint32_t)__lane_mask; + return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width) + << 32ull) | + ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width)); +} + // Shuffles the the lanes according to the given index. _DEFAULT_FN_ATTRS static __inline__ float __gpu_shuffle_idx_f32(uint64_t __lane_mask, uint32_t __idx, float __x, diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index d00a5f6de3950..4c63bc99c0bf2 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -13,15 +13,12 @@ #error "This file is intended for NVPTX targets or offloading to NVPTX" #endif -#ifndef __CUDA_ARCH__ -#define __CUDA_ARCH__ 0 +#ifndef __GPUINTRIN_H +#warning "This file is intended as an implementation detail of gpuintrin.h" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __CUDA_ARCH__ +#define __CUDA_ARCH__ 0 #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -153,18 +150,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, ((__gpu_num_lanes() - __width) << 8u) | 0x1f); } -// Shuffles the the lanes inside the warp according to the given index. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, - uint32_t __width) { - uint32_t __hi = (uint32_t)(__x >> 32ull); - uint32_t
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
https://github.com/JonChesterfield edited https://github.com/llvm/llvm-project/pull/131134 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
@@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); jhuber6 wrote: Where did this go? https://github.com/llvm/llvm-project/pull/131134 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/131134 Adds macro guards to warn if the implementation headers are included directly as part of dropping the need for them to be standalone. I'd like to declare functions before the include but it might be be viable with the openmp pragma annotation to do so. >From f0149fdf6d8fcf60b128bef8aacf299e846cc4a8 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 13 Mar 2025 12:49:42 + Subject: [PATCH] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers --- clang/lib/Headers/amdgpuintrin.h | 15 ++- clang/lib/Headers/gpuintrin.h| 20 ++-- clang/lib/Headers/nvptxintrin.h | 19 --- 3 files changed, 20 insertions(+), 34 deletions(-) diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index 839a05175cf3e..7b1d16f8ca88d 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -13,11 +13,8 @@ #error "This file is intended for AMDGPU targets or offloading to AMDGPU" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __GPUINTRIN_H +#warning "This file is intended as an implementation detail of gpuintrin.h" #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -33,10 +30,6 @@ _Pragma("omp begin declare variant match(device = {arch(amdgcn)})"); // Attribute to declare a function as a kernel. #define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected"))) -// Defined in gpuintrin.h, used later in this file. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); - // Returns the number of workgroups in the 'x' dimension of the grid. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); @@ -238,8 +231,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); -#endif - #endif // __AMDGPUINTRIN_H diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index 4181628d18048..8d300b5b9acb8 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -25,6 +25,20 @@ #endif #endif +#include + +#if !defined(__cplusplus) +_Pragma("push_macro(\"bool\")"); +#define bool _Bool +#endif + +// Declare functions that can be called by the implementation headers + +// Returns the number of workgroups in the 'x' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); + + #if defined(__NVPTX__) #include #elif defined(__AMDGPU__) @@ -33,12 +47,6 @@ #error "This header is only meant to be used on GPU architectures." #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool -#endif _Pragma("omp begin declare target device_type(nohost)"); _Pragma("omp begin declare variant match(device = {kind(gpu)})"); diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index d00a5f6de3950..170c943fe63a2 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -13,15 +13,12 @@ #error "This file is intended for NVPTX targets or offloading to NVPTX" #endif -#ifndef __CUDA_ARCH__ -#define __CUDA_ARCH__ 0 +#ifndef __GPUINTRIN_H +#warning "This file is intended as an implementation detail of gpuintrin.h" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __CUDA_ARCH__ +#define __CUDA_ARCH__ 0 #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -37,10 +34,6 @@ _Pragma("omp begin declare variant match(device = {arch(nvptx64)})"); // Attribute to declare a function as a kernel. #define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected"))) -// Defined in gpuintrin.h, used later in this file. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); - // Returns the number of CUDA blocks in the 'x' dimension. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __nvvm_read_ptx_sreg_nctaid_x(); @@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); -#endif - #endif // __NVPTXINTRIN_H ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
github-actions[bot] wrote: :warning: C/C++ code formatter, clang-format found issues in your code. :warning: You can test this locally with the following command: ``bash git-clang-format --diff d3255474be3ea24d876eadb6e97a6424c132b23d f0149fdf6d8fcf60b128bef8aacf299e846cc4a8 --extensions h -- clang/lib/Headers/amdgpuintrin.h clang/lib/Headers/gpuintrin.h clang/lib/Headers/nvptxintrin.h `` View the diff from clang-format here. ``diff diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index 8d300b5b9a..515948b357 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -38,7 +38,6 @@ _Pragma("push_macro(\"bool\")"); _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); - #if defined(__NVPTX__) #include #elif defined(__AMDGPU__) @@ -47,7 +46,6 @@ __gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); #error "This header is only meant to be used on GPU architectures." #endif - _Pragma("omp begin declare target device_type(nohost)"); _Pragma("omp begin declare variant match(device = {kind(gpu)})"); `` https://github.com/llvm/llvm-project/pull/131134 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
@@ -13,11 +13,8 @@ #error "This file is intended for AMDGPU targets or offloading to AMDGPU" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __GPUINTRIN_H +#warning "This file is intended as an implementation detail of gpuintrin.h" JonChesterfield wrote: yep, done https://github.com/llvm/llvm-project/pull/131134 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
@@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); JonChesterfield wrote: Up into gpuintrin.h https://github.com/llvm/llvm-project/pull/131134 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
https://github.com/JonChesterfield updated https://github.com/llvm/llvm-project/pull/131134 >From 7347ebc6a0aadd1b9676e329bdf7705dbfae7875 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Thu, 13 Mar 2025 12:49:42 + Subject: [PATCH] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers --- clang/lib/Headers/amdgpuintrin.h | 22 ++ clang/lib/Headers/gpuintrin.h| 26 +++--- clang/lib/Headers/nvptxintrin.h | 27 --- 3 files changed, 25 insertions(+), 50 deletions(-) diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index 839a05175cf3e..56748f6c3e818 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -13,11 +13,8 @@ #error "This file is intended for AMDGPU targets or offloading to AMDGPU" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __GPUINTRIN_H +#error "Never use directly; include instead" #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -146,17 +143,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, return __builtin_amdgcn_ds_bpermute(__lane << 2, __x); } -// Shuffles the the lanes inside the wavefront according to the given index. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, - uint32_t __width) { - uint32_t __hi = (uint32_t)(__x >> 32ull); - uint32_t __lo = (uint32_t)(__x & 0x); - return ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __hi, __width) - << 32ull) | - ((uint64_t)__gpu_shuffle_idx_u32(__lane_mask, __idx, __lo, __width)); -} - // Returns a bitmask marking all lanes that have the same value of __x. _DEFAULT_FN_ATTRS static __inline__ uint64_t __gpu_match_any_u32(uint64_t __lane_mask, uint32_t __x) { @@ -238,8 +224,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); -#endif - #endif // __AMDGPUINTRIN_H diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index 4181628d18048..ac79d685337c5 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -25,6 +25,13 @@ #endif #endif +#include + +#if !defined(__cplusplus) +_Pragma("push_macro(\"bool\")"); +#define bool _Bool +#endif + #if defined(__NVPTX__) #include #elif defined(__AMDGPU__) @@ -33,13 +40,6 @@ #error "This header is only meant to be used on GPU architectures." #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool -#endif - _Pragma("omp begin declare target device_type(nohost)"); _Pragma("omp begin declare variant match(device = {kind(gpu)})"); @@ -141,6 +141,18 @@ __gpu_read_first_lane_f64(uint64_t __lane_mask, double __x) { __builtin_bit_cast(uint64_t, __x))); } +// Shuffles the the lanes according to the given index. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, + uint32_t __width) { + uint32_t __hi = (uint32_t)(__x >> 32ull); + uint32_t __lo = (uint32_t)(__x & 0x); + uint32_t __mask = (uint32_t)__lane_mask; + return ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __hi, __width) + << 32ull) | + ((uint64_t)__gpu_shuffle_idx_u32(__mask, __idx, __lo, __width)); +} + // Shuffles the the lanes according to the given index. _DEFAULT_FN_ATTRS static __inline__ float __gpu_shuffle_idx_f32(uint64_t __lane_mask, uint32_t __idx, float __x, diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index d00a5f6de3950..10ad7a682d4cd 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -13,15 +13,12 @@ #error "This file is intended for NVPTX targets or offloading to NVPTX" #endif -#ifndef __CUDA_ARCH__ -#define __CUDA_ARCH__ 0 +#ifndef __GPUINTRIN_H +#error "Never use directly; include instead" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __CUDA_ARCH__ +#define __CUDA_ARCH__ 0 #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -153,18 +150,6 @@ __gpu_shuffle_idx_u32(uint64_t __lane_mask, uint32_t __idx, uint32_t __x, ((__gpu_num_lanes() - __width) << 8u) | 0x1f); } -// Shuffles the the lanes inside the warp according to the given index. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_shuffle_idx_u64(uint64_t __lane_mask, uint32_t __idx, uint64_t __x, - uint32_t __width) { - uint32_t __hi = (uint32_t)(__x >> 32ull); - uint32_t __lo = (uint32_t)(__x & 0x); - uint32_t __mask =
[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Jon Chesterfield (JonChesterfield) Changes Adds macro guards to warn if the implementation headers are included directly as part of dropping the need for them to be standalone. I'd like to declare functions before the include but it might be be viable with the openmp pragma annotation to do so. --- Full diff: https://github.com/llvm/llvm-project/pull/131134.diff 3 Files Affected: - (modified) clang/lib/Headers/amdgpuintrin.h (+2-13) - (modified) clang/lib/Headers/gpuintrin.h (+14-6) - (modified) clang/lib/Headers/nvptxintrin.h (+4-15) ``diff diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h index 839a05175cf3e..7b1d16f8ca88d 100644 --- a/clang/lib/Headers/amdgpuintrin.h +++ b/clang/lib/Headers/amdgpuintrin.h @@ -13,11 +13,8 @@ #error "This file is intended for AMDGPU targets or offloading to AMDGPU" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __GPUINTRIN_H +#warning "This file is intended as an implementation detail of gpuintrin.h" #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -33,10 +30,6 @@ _Pragma("omp begin declare variant match(device = {arch(amdgcn)})"); // Attribute to declare a function as a kernel. #define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected"))) -// Defined in gpuintrin.h, used later in this file. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); - // Returns the number of workgroups in the 'x' dimension of the grid. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x(); @@ -238,8 +231,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); -#endif - #endif // __AMDGPUINTRIN_H diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h index 4181628d18048..8d300b5b9acb8 100644 --- a/clang/lib/Headers/gpuintrin.h +++ b/clang/lib/Headers/gpuintrin.h @@ -25,6 +25,20 @@ #endif #endif +#include + +#if !defined(__cplusplus) +_Pragma("push_macro(\"bool\")"); +#define bool _Bool +#endif + +// Declare functions that can be called by the implementation headers + +// Returns the number of workgroups in the 'x' dimension of the grid. +_DEFAULT_FN_ATTRS static __inline__ uint64_t +__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); + + #if defined(__NVPTX__) #include #elif defined(__AMDGPU__) @@ -33,12 +47,6 @@ #error "This header is only meant to be used on GPU architectures." #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool -#endif _Pragma("omp begin declare target device_type(nohost)"); _Pragma("omp begin declare variant match(device = {kind(gpu)})"); diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h index d00a5f6de3950..170c943fe63a2 100644 --- a/clang/lib/Headers/nvptxintrin.h +++ b/clang/lib/Headers/nvptxintrin.h @@ -13,15 +13,12 @@ #error "This file is intended for NVPTX targets or offloading to NVPTX" #endif -#ifndef __CUDA_ARCH__ -#define __CUDA_ARCH__ 0 +#ifndef __GPUINTRIN_H +#warning "This file is intended as an implementation detail of gpuintrin.h" #endif -#include - -#if !defined(__cplusplus) -_Pragma("push_macro(\"bool\")"); -#define bool _Bool +#ifndef __CUDA_ARCH__ +#define __CUDA_ARCH__ 0 #endif _Pragma("omp begin declare target device_type(nohost)"); @@ -37,10 +34,6 @@ _Pragma("omp begin declare variant match(device = {arch(nvptx64)})"); // Attribute to declare a function as a kernel. #define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected"))) -// Defined in gpuintrin.h, used later in this file. -_DEFAULT_FN_ATTRS static __inline__ uint64_t -__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x); - // Returns the number of CUDA blocks in the 'x' dimension. _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) { return __nvvm_read_ptx_sreg_nctaid_x(); @@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void __gpu_thread_suspend(void) { _Pragma("omp end declare variant"); _Pragma("omp end declare target"); -#if !defined(__cplusplus) -_Pragma("pop_macro(\"bool\")"); -#endif - #endif // __NVPTXINTRIN_H `` https://github.com/llvm/llvm-project/pull/131134 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits