[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)
JonChesterfield wrote: Yep. I'm looking at changing their implementation and want a before&after shot in the git diff for the upcoming review. If that doesn't pan out, still good to get a heads up if codegen for these changes on us unexpectedly. https://github.com/llvm/llvm-project/pull/130956 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)
https://github.com/JonChesterfield closed https://github.com/llvm/llvm-project/pull/130956 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)
https://github.com/jhuber6 approved this pull request. Fine way to show the intrinsics that this generates. I originally kept it brief since I just assumed those were tested elsewhere. https://github.com/llvm/llvm-project/pull/130956 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)
llvmbot wrote: @llvm/pr-subscribers-libc Author: Jon Chesterfield (JonChesterfield) Changes Regenerated existing test case with include-generated-funcs to show the lowered IR for each instantiation. --- Patch is 46.11 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/130956.diff 1 Files Affected: - (modified) clang/test/Headers/gpuintrin.c (+808-63) ``diff diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c index 89efe12ee8def..30aa6f147ba03 100644 --- a/clang/test/Headers/gpuintrin.c +++ b/clang/test/Headers/gpuintrin.c @@ -1,10 +1,10 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ // RUN: -internal-isystem %S/../../lib/Headers/ \ // RUN: -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefix=AMDGPU // -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ // RUN: -internal-isystem %S/../../lib/Headers/ \ // RUN: -target-feature +ptx62 \ // RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \ @@ -12,6 +12,35 @@ #include +__gpu_kernel void foo() { + __gpu_num_blocks_x(); + __gpu_num_blocks_y(); + __gpu_num_blocks_z(); + __gpu_num_blocks(0); + __gpu_block_id_x(); + __gpu_block_id_y(); + __gpu_block_id_z(); + __gpu_block_id(0); + __gpu_num_threads_x(); + __gpu_num_threads_y(); + __gpu_num_threads_z(); + __gpu_num_threads(0); + __gpu_thread_id_x(); + __gpu_thread_id_y(); + __gpu_thread_id_z(); + __gpu_thread_id(0); + __gpu_num_lanes(); + __gpu_lane_id(); + __gpu_lane_mask(); + __gpu_read_first_lane_u32(-1, -1); + __gpu_ballot(-1, 1); + __gpu_sync_threads(); + __gpu_sync_lane(-1); + __gpu_shuffle_idx_u32(-1, -1, -1, 0); + __gpu_first_lane_id(-1); + __gpu_is_first_in_lane(-1); + __gpu_exit(); +} // AMDGPU-LABEL: define protected amdgpu_kernel void @foo( // AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] { // AMDGPU-NEXT: [[ENTRY:.*:]] @@ -44,52 +73,244 @@ // AMDGPU-NEXT:call void @__gpu_exit() #[[ATTR8:[0-9]+]] // AMDGPU-NEXT:unreachable // -// NVPTX-LABEL: define protected ptx_kernel void @foo( -// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] { -// NVPTX-NEXT: [[ENTRY:.*:]] -// NVPTX-NEXT:[[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR6:[0-9]+]] -// NVPTX-NEXT:[[CALL1:%.*]] = call i32 @__gpu_num_blocks_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL2:%.*]] = call i32 @__gpu_num_blocks_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL3:%.*]] = call i32 @__gpu_num_blocks(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL4:%.*]] = call i32 @__gpu_block_id_x() #[[ATTR6]] -// NVPTX-NEXT:[[CALL5:%.*]] = call i32 @__gpu_block_id_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL6:%.*]] = call i32 @__gpu_block_id_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL7:%.*]] = call i32 @__gpu_block_id(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL8:%.*]] = call i32 @__gpu_num_threads_x() #[[ATTR6]] -// NVPTX-NEXT:[[CALL9:%.*]] = call i32 @__gpu_num_threads_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL10:%.*]] = call i32 @__gpu_num_threads_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL11:%.*]] = call i32 @__gpu_num_threads(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL12:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR6]] -// NVPTX-NEXT:[[CALL13:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL14:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL15:%.*]] = call i32 @__gpu_thread_id(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL16:%.*]] = call i32 @__gpu_num_lanes() #[[ATTR6]] -// NVPTX-NEXT:[[CALL17:%.*]] = call i32 @__gpu_lane_id() #[[ATTR6]] -// NVPTX-NEXT:[[CALL18:%.*]] = call i64 @__gpu_lane_mask() #[[ATTR6]] -// NVPTX-NEXT:[[CALL19:%.*]] = call i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:[[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR6]] -// NVPTX-NEXT:call void @__gpu_sync_threads() #[[ATTR6]] -// NVPTX-NEXT:call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:[[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:[[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:call void @__gpu_exit() #[[ATTR7:[0-9]+]] -// NVPTX-NEXT:unreachable // -__gpu_kernel void foo() { - __gpu_num_blocks_x(); - __gpu_num_blocks_y(); - __gpu_num_blocks_z(); - __gpu_num_blocks(0); - __gpu_block_id_x(); - __gpu_bl
[clang] [libc][nfc] Include instantiations of gpuintrin.h in IR test case (PR #130956)
https://github.com/JonChesterfield created https://github.com/llvm/llvm-project/pull/130956 Regenerated existing test case with include-generated-funcs to show the lowered IR for each instantiation. >From 4ec726e4fcf5ab0b03f3942e42a4dbde1a6f43a4 Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Wed, 12 Mar 2025 13:03:05 + Subject: [PATCH] [libc][nfc] Include instantiations of gpuintrin.h in IR test case --- clang/test/Headers/gpuintrin.c | 871 ++--- 1 file changed, 808 insertions(+), 63 deletions(-) diff --git a/clang/test/Headers/gpuintrin.c b/clang/test/Headers/gpuintrin.c index 89efe12ee8def..30aa6f147ba03 100644 --- a/clang/test/Headers/gpuintrin.c +++ b/clang/test/Headers/gpuintrin.c @@ -1,10 +1,10 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --include-generated-funcs --version 5 +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ // RUN: -internal-isystem %S/../../lib/Headers/ \ // RUN: -triple amdgcn-amd-amdhsa -emit-llvm %s -o - \ // RUN: | FileCheck %s --check-prefix=AMDGPU // -// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ +// RUN: %clang_cc1 -internal-isystem %S/Inputs/include \ // RUN: -internal-isystem %S/../../lib/Headers/ \ // RUN: -target-feature +ptx62 \ // RUN: -triple nvptx64-nvidia-cuda -emit-llvm %s -o - \ @@ -12,6 +12,35 @@ #include +__gpu_kernel void foo() { + __gpu_num_blocks_x(); + __gpu_num_blocks_y(); + __gpu_num_blocks_z(); + __gpu_num_blocks(0); + __gpu_block_id_x(); + __gpu_block_id_y(); + __gpu_block_id_z(); + __gpu_block_id(0); + __gpu_num_threads_x(); + __gpu_num_threads_y(); + __gpu_num_threads_z(); + __gpu_num_threads(0); + __gpu_thread_id_x(); + __gpu_thread_id_y(); + __gpu_thread_id_z(); + __gpu_thread_id(0); + __gpu_num_lanes(); + __gpu_lane_id(); + __gpu_lane_mask(); + __gpu_read_first_lane_u32(-1, -1); + __gpu_ballot(-1, 1); + __gpu_sync_threads(); + __gpu_sync_lane(-1); + __gpu_shuffle_idx_u32(-1, -1, -1, 0); + __gpu_first_lane_id(-1); + __gpu_is_first_in_lane(-1); + __gpu_exit(); +} // AMDGPU-LABEL: define protected amdgpu_kernel void @foo( // AMDGPU-SAME: ) #[[ATTR0:[0-9]+]] { // AMDGPU-NEXT: [[ENTRY:.*:]] @@ -44,52 +73,244 @@ // AMDGPU-NEXT:call void @__gpu_exit() #[[ATTR8:[0-9]+]] // AMDGPU-NEXT:unreachable // -// NVPTX-LABEL: define protected ptx_kernel void @foo( -// NVPTX-SAME: ) #[[ATTR0:[0-9]+]] { -// NVPTX-NEXT: [[ENTRY:.*:]] -// NVPTX-NEXT:[[CALL:%.*]] = call i32 @__gpu_num_blocks_x() #[[ATTR6:[0-9]+]] -// NVPTX-NEXT:[[CALL1:%.*]] = call i32 @__gpu_num_blocks_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL2:%.*]] = call i32 @__gpu_num_blocks_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL3:%.*]] = call i32 @__gpu_num_blocks(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL4:%.*]] = call i32 @__gpu_block_id_x() #[[ATTR6]] -// NVPTX-NEXT:[[CALL5:%.*]] = call i32 @__gpu_block_id_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL6:%.*]] = call i32 @__gpu_block_id_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL7:%.*]] = call i32 @__gpu_block_id(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL8:%.*]] = call i32 @__gpu_num_threads_x() #[[ATTR6]] -// NVPTX-NEXT:[[CALL9:%.*]] = call i32 @__gpu_num_threads_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL10:%.*]] = call i32 @__gpu_num_threads_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL11:%.*]] = call i32 @__gpu_num_threads(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL12:%.*]] = call i32 @__gpu_thread_id_x() #[[ATTR6]] -// NVPTX-NEXT:[[CALL13:%.*]] = call i32 @__gpu_thread_id_y() #[[ATTR6]] -// NVPTX-NEXT:[[CALL14:%.*]] = call i32 @__gpu_thread_id_z() #[[ATTR6]] -// NVPTX-NEXT:[[CALL15:%.*]] = call i32 @__gpu_thread_id(i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL16:%.*]] = call i32 @__gpu_num_lanes() #[[ATTR6]] -// NVPTX-NEXT:[[CALL17:%.*]] = call i32 @__gpu_lane_id() #[[ATTR6]] -// NVPTX-NEXT:[[CALL18:%.*]] = call i64 @__gpu_lane_mask() #[[ATTR6]] -// NVPTX-NEXT:[[CALL19:%.*]] = call i32 @__gpu_read_first_lane_u32(i64 noundef -1, i32 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:[[CALL20:%.*]] = call i64 @__gpu_ballot(i64 noundef -1, i1 noundef zeroext true) #[[ATTR6]] -// NVPTX-NEXT:call void @__gpu_sync_threads() #[[ATTR6]] -// NVPTX-NEXT:call void @__gpu_sync_lane(i64 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:[[CALL21:%.*]] = call i32 @__gpu_shuffle_idx_u32(i64 noundef -1, i32 noundef -1, i32 noundef -1, i32 noundef 0) #[[ATTR6]] -// NVPTX-NEXT:[[CALL22:%.*]] = call i64 @__gpu_first_lane_id(i64 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:[[CALL23:%.*]] = call zeroext i1 @__gpu_is_first_in_lane(i64 noundef -1) #[[ATTR6]] -// NVPTX-NEXT:call void @__gpu_exit() #[[ATTR7:[0-9]+]] -// NVPTX-NEXT:unreachable // -__gpu_kernel void foo() { - __gpu