[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)

Jon Chesterfield via cfe-commits Thu, 13 Mar 2025 06:12:16 -0700

https://github.com/JonChesterfield created 
https://github.com/llvm/llvm-project/pull/131134


Adds macro guards to warn if the implementation headers are included directly 
as part of dropping the need for them to be standalone.

I'd like to declare functions before the include but it might be be viable with 
the openmp pragma annotation to do so.

>From f0149fdf6d8fcf60b128bef8aacf299e846cc4a8 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield <jonathanchesterfi...@gmail.com>
Date: Thu, 13 Mar 2025 12:49:42 +0000
Subject: [PATCH] [libc][nfc] Steps to allow sharing code between gpu intrin.h
 headers

---
 clang/lib/Headers/amdgpuintrin.h | 15 ++-------------
 clang/lib/Headers/gpuintrin.h    | 20 ++++++++++++++------
 clang/lib/Headers/nvptxintrin.h  | 19 ++++---------------
 3 files changed, 20 insertions(+), 34 deletions(-)

diff --git a/clang/lib/Headers/amdgpuintrin.h b/clang/lib/Headers/amdgpuintrin.h
index 839a05175cf3e..7b1d16f8ca88d 100644
--- a/clang/lib/Headers/amdgpuintrin.h
+++ b/clang/lib/Headers/amdgpuintrin.h
@@ -13,11 +13,8 @@
 #error "This file is intended for AMDGPU targets or offloading to AMDGPU"
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __GPUINTRIN_H
+#warning "This file is intended as an implementation detail of gpuintrin.h"
 #endif
 
 _Pragma("omp begin declare target device_type(nohost)");
@@ -33,10 +30,6 @@ _Pragma("omp begin declare variant match(device = 
{arch(amdgcn)})");
 // Attribute to declare a function as a kernel.
 #define __gpu_kernel __attribute__((amdgpu_kernel, visibility("protected")))
 
-// Defined in gpuintrin.h, used later in this file.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
-
 // Returns the number of workgroups in the 'x' dimension of the grid.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
   return __builtin_amdgcn_grid_size_x() / __builtin_amdgcn_workgroup_size_x();
@@ -238,8 +231,4 @@ _DEFAULT_FN_ATTRS static __inline__ void 
__gpu_thread_suspend(void) {
 _Pragma("omp end declare variant");
 _Pragma("omp end declare target");
 
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
 #endif // __AMDGPUINTRIN_H
diff --git a/clang/lib/Headers/gpuintrin.h b/clang/lib/Headers/gpuintrin.h
index 4181628d18048..8d300b5b9acb8 100644
--- a/clang/lib/Headers/gpuintrin.h
+++ b/clang/lib/Headers/gpuintrin.h
@@ -25,6 +25,20 @@
 #endif
 #endif
 
+#include <stdint.h>
+
+#if !defined(__cplusplus)
+_Pragma("push_macro(\"bool\")");
+#define bool _Bool
+#endif
+
+// Declare functions that can be called by the implementation headers
+
+// Returns the number of workgroups in the 'x' dimension of the grid.
+_DEFAULT_FN_ATTRS static __inline__ uint64_t
+__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
+
+
 #if defined(__NVPTX__)
 #include <nvptxintrin.h>
 #elif defined(__AMDGPU__)
@@ -33,12 +47,6 @@
 #error "This header is only meant to be used on GPU architectures."
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
-#endif
 
 _Pragma("omp begin declare target device_type(nohost)");
 _Pragma("omp begin declare variant match(device = {kind(gpu)})");
diff --git a/clang/lib/Headers/nvptxintrin.h b/clang/lib/Headers/nvptxintrin.h
index d00a5f6de3950..170c943fe63a2 100644
--- a/clang/lib/Headers/nvptxintrin.h
+++ b/clang/lib/Headers/nvptxintrin.h
@@ -13,15 +13,12 @@
 #error "This file is intended for NVPTX targets or offloading to NVPTX"
 #endif
 
-#ifndef __CUDA_ARCH__
-#define __CUDA_ARCH__ 0
+#ifndef __GPUINTRIN_H
+#warning "This file is intended as an implementation detail of gpuintrin.h"
 #endif
 
-#include <stdint.h>
-
-#if !defined(__cplusplus)
-_Pragma("push_macro(\"bool\")");
-#define bool _Bool
+#ifndef __CUDA_ARCH__
+#define __CUDA_ARCH__ 0
 #endif
 
 _Pragma("omp begin declare target device_type(nohost)");
@@ -37,10 +34,6 @@ _Pragma("omp begin declare variant match(device = 
{arch(nvptx64)})");
 // Attribute to declare a function as a kernel.
 #define __gpu_kernel __attribute__((nvptx_kernel, visibility("protected")))
 
-// Defined in gpuintrin.h, used later in this file.
-_DEFAULT_FN_ATTRS static __inline__ uint64_t
-__gpu_read_first_lane_u64(uint64_t __lane_mask, uint64_t __x);
-
 // Returns the number of CUDA blocks in the 'x' dimension.
 _DEFAULT_FN_ATTRS static __inline__ uint32_t __gpu_num_blocks_x(void) {
   return __nvvm_read_ptx_sreg_nctaid_x();
@@ -263,8 +256,4 @@ _DEFAULT_FN_ATTRS static __inline__ void 
__gpu_thread_suspend(void) {
 _Pragma("omp end declare variant");
 _Pragma("omp end declare target");
 
-#if !defined(__cplusplus)
-_Pragma("pop_macro(\"bool\")");
-#endif
-
 #endif // __NVPTXINTRIN_H

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [libc][nfc] Steps to allow sharing code between gpu intrin.h headers (PR #131134)

Reply via email to