[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)

2025-03-12 Thread Jon Chesterfield via cfe-commits

JonChesterfield wrote:

Yep. I'm looking at changing their implementation and want a before&after shot 
in the git diff for the upcoming review. If that doesn't pan out, still good to 
get a heads up if codegen for these changes on us unexpectedly.

https://github.com/llvm/llvm-project/pull/130956
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)

2025-03-12 Thread Jon Chesterfield via cfe-commits

https://github.com/JonChesterfield closed 
https://github.com/llvm/llvm-project/pull/130956
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)

2025-03-12 Thread Joseph Huber via cfe-commits

https://github.com/jhuber6 approved this pull request.

Fine way to show the intrinsics that this generates. I originally kept it brief 
since I just assumed those were tested elsewhere.

https://github.com/llvm/llvm-project/pull/130956
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)

2025-03-12 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-libc

Author: Jon Chesterfield (JonChesterfield)


Changes

Regenerated existing test case with include-generated-funcs to show the lowered 
IR for each instantiation.

---

Patch is 46.11 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/130956.diff


1 Files Affected:

- (modified) clang/test/Headers/gpuintrin.c (+808-63) 


``diff
diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c
index 89efe12ee8def..30aa6f147ba03 100644
--- a/clang/test/Headers/gpuintrin.c
+++ b/clang/test/Headers/gpuintrin.c
@@ -1,10 +1,10 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
-// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --version 5
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include  \
 // RUN:   -internal-isystem %S/../../lib/Headers/ \
 // RUN:   -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \
 // RUN: | FileCheck %s --check-prefix=AMDGPU
 //
-// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include  \
 // RUN:   -internal-isystem %S/../../lib/Headers/ \
 // RUN:   -target-feature +ptx62 \
 // RUN:   -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \
@@ -12,6 +12,35 @@
 
 #include 
 
+__gpu_kernel void foo() {
+  __gpu_num_blocks_x();
+  __gpu_num_blocks_y();
+  __gpu_num_blocks_z();
+  __gpu_num_blocks(0);
+  __gpu_block_id_x();
+  __gpu_block_id_y();
+  __gpu_block_id_z();
+  __gpu_block_id(0);
+  __gpu_num_threads_x();
+  __gpu_num_threads_y();
+  __gpu_num_threads_z();
+  __gpu_num_threads(0);
+  __gpu_thread_id_x();
+  __gpu_thread_id_y();
+  __gpu_thread_id_z();
+  __gpu_thread_id(0);
+  __gpu_num_lanes();
+  __gpu_lane_id();
+  __gpu_lane_mask();
+  __gpu_read_first_lane_u32(-1, -1);
+  __gpu_ballot(-1, 1);
+  __gpu_sync_threads();
+  __gpu_sync_lane(-1);
+  __gpu_shuffle_idx_u32(-1, -1, -1, 0);
+  __gpu_first_lane_id(-1);
+  __gpu_is_first_in_lane(-1);
+  __gpu_exit();
+}
 // AMDGPU-LABEL: define protected amdgpu_kernel void @foo(
 // AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] {
 // AMDGPU-NEXT:  [[ENTRY:.*:]]
@@ -44,52 +73,244 @@
 // AMDGPU-NEXT:call void @__gpu_exit() #[[ATTR8:[0-9]+]]
 // AMDGPU-NEXT:unreachable
 //
-// NVPTX-LABEL: define protected ptx_kernel void @foo(
-// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] {
-// NVPTX-NEXT:  [[ENTRY:.*:]]
-// NVPTX-NEXT:[[CALL:%.*]] = call i32 @__gpu_num_blocks_x() 
#[[ATTR6:[0-9]+]]
-// NVPTX-NEXT:[[CALL1:%.*]] = call i32 @__gpu_num_blocks_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL2:%.*]] = call i32 @__gpu_num_blocks_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL3:%.*]] = call i32 @__gpu_num_blocks(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL4:%.*]] = call i32 @__gpu_block_id_x() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL5:%.*]] = call i32 @__gpu_block_id_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL6:%.*]] = call i32 @__gpu_block_id_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL7:%.*]] = call i32 @__gpu_block_id(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL8:%.*]] = call i32 @__gpu_num_threads_x() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL9:%.*]] = call i32 @__gpu_num_threads_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL10:%.*]] = call i32 @__gpu_num_threads_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL11:%.*]] = call i32 @__gpu_num_threads(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL12:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL13:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL14:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL15:%.*]] = call i32 @__gpu_thread_id(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL16:%.*]] = call i32 @__gpu_num_lanes() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL17:%.*]] = call i32 @__gpu_lane_id() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL18:%.*]] = call i64 @__gpu_lane_mask() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL19:%.*]] = call i32 @__gpu_read_first_lane_u32(i64 
noundef -1, i32 noundef -1) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 
noundef zeroext true) #[[ATTR6]]
-// NVPTX-NEXT:call void @__gpu_sync_threads() #[[ATTR6]]
-// NVPTX-NEXT:call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef 
-1, i32 noundef -1, i32 noundef -1, i32 noundef 0) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef 
-1) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 
noundef -1) #[[ATTR6]]
-// NVPTX-NEXT:call void @__gpu_exit() #[[ATTR7:[0-9]+]]
-// NVPTX-NEXT:unreachable
 //
-__gpu_kernel void foo() {
-  __gpu_num_blocks_x();
-  __gpu_num_blocks_y();
-  __gpu_num_blocks_z();
-  __gpu_num_blocks(0);
-  __gpu_block_id_x();
-  __gpu_bl

[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)

2025-03-12 Thread Jon Chesterfield via cfe-commits

https://github.com/JonChesterfield created 
https://github.com/llvm/llvm-project/pull/130956

Regenerated existing test case with include-generated-funcs to show the lowered 
IR for each instantiation.

>From 4ec726e4fcf5ab0b03f3942e42a4dbde1a6f43a4 Mon Sep 17 00:00:00 2001
From: Jon Chesterfield 
Date: Wed, 12 Mar 2025 13:03:05 +
Subject: [PATCH] [libc][nfc] Include instantiations of gpuintrin.h in IR test
 case

---
 clang/test/Headers/gpuintrin.c | 871 ++---
 1 file changed, 808 insertions(+), 63 deletions(-)

diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c
index 89efe12ee8def..30aa6f147ba03 100644
--- a/clang/test/Headers/gpuintrin.c
+++ b/clang/test/Headers/gpuintrin.c
@@ -1,10 +1,10 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
-// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --include-generated-funcs --version 5
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include  \
 // RUN:   -internal-isystem %S/../../lib/Headers/ \
 // RUN:   -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \
 // RUN: | FileCheck %s --check-prefix=AMDGPU
 //
-// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \
+// RUN: %clang_cc1 -internal-isystem %S/Inputs/include  \
 // RUN:   -internal-isystem %S/../../lib/Headers/ \
 // RUN:   -target-feature +ptx62 \
 // RUN:   -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \
@@ -12,6 +12,35 @@
 
 #include 
 
+__gpu_kernel void foo() {
+  __gpu_num_blocks_x();
+  __gpu_num_blocks_y();
+  __gpu_num_blocks_z();
+  __gpu_num_blocks(0);
+  __gpu_block_id_x();
+  __gpu_block_id_y();
+  __gpu_block_id_z();
+  __gpu_block_id(0);
+  __gpu_num_threads_x();
+  __gpu_num_threads_y();
+  __gpu_num_threads_z();
+  __gpu_num_threads(0);
+  __gpu_thread_id_x();
+  __gpu_thread_id_y();
+  __gpu_thread_id_z();
+  __gpu_thread_id(0);
+  __gpu_num_lanes();
+  __gpu_lane_id();
+  __gpu_lane_mask();
+  __gpu_read_first_lane_u32(-1, -1);
+  __gpu_ballot(-1, 1);
+  __gpu_sync_threads();
+  __gpu_sync_lane(-1);
+  __gpu_shuffle_idx_u32(-1, -1, -1, 0);
+  __gpu_first_lane_id(-1);
+  __gpu_is_first_in_lane(-1);
+  __gpu_exit();
+}
 // AMDGPU-LABEL: define protected amdgpu_kernel void @foo(
 // AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] {
 // AMDGPU-NEXT:  [[ENTRY:.*:]]
@@ -44,52 +73,244 @@
 // AMDGPU-NEXT:call void @__gpu_exit() #[[ATTR8:[0-9]+]]
 // AMDGPU-NEXT:unreachable
 //
-// NVPTX-LABEL: define protected ptx_kernel void @foo(
-// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] {
-// NVPTX-NEXT:  [[ENTRY:.*:]]
-// NVPTX-NEXT:[[CALL:%.*]] = call i32 @__gpu_num_blocks_x() 
#[[ATTR6:[0-9]+]]
-// NVPTX-NEXT:[[CALL1:%.*]] = call i32 @__gpu_num_blocks_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL2:%.*]] = call i32 @__gpu_num_blocks_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL3:%.*]] = call i32 @__gpu_num_blocks(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL4:%.*]] = call i32 @__gpu_block_id_x() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL5:%.*]] = call i32 @__gpu_block_id_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL6:%.*]] = call i32 @__gpu_block_id_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL7:%.*]] = call i32 @__gpu_block_id(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL8:%.*]] = call i32 @__gpu_num_threads_x() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL9:%.*]] = call i32 @__gpu_num_threads_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL10:%.*]] = call i32 @__gpu_num_threads_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL11:%.*]] = call i32 @__gpu_num_threads(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL12:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL13:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL14:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL15:%.*]] = call i32 @__gpu_thread_id(i32 noundef 0) 
#[[ATTR6]]
-// NVPTX-NEXT:[[CALL16:%.*]] = call i32 @__gpu_num_lanes() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL17:%.*]] = call i32 @__gpu_lane_id() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL18:%.*]] = call i64 @__gpu_lane_mask() #[[ATTR6]]
-// NVPTX-NEXT:[[CALL19:%.*]] = call i32 @__gpu_read_first_lane_u32(i64 
noundef -1, i32 noundef -1) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 
noundef zeroext true) #[[ATTR6]]
-// NVPTX-NEXT:call void @__gpu_sync_threads() #[[ATTR6]]
-// NVPTX-NEXT:call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef 
-1, i32 noundef -1, i32 noundef -1, i32 noundef 0) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef 
-1) #[[ATTR6]]
-// NVPTX-NEXT:[[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 
noundef -1) #[[ATTR6]]
-// NVPTX-NEXT:call void @__gpu_exit() #[[ATTR7:[0-9]+]]
-// NVPTX-NEXT:unreachable
 //
-__gpu_kernel void foo() {
-  __gpu