llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Joseph Huber (jhuber6)

<details>
<summary>Changes</summary>

Summary:
This module flag is intended to be used in the target library info to
perform certain optimizations on string functions. OpenCL does not
support neither wchar nor these strings functions so it has no use.
Currently, this needs to be suppressed with OpenCL library builds
because of compatibility issues with MSVC headers. Simply do not expose
this for now.

Perhaps we can change this to not be a module flag in the future, but
I'm assuming this is due to the fact that compiler settings can modify
this without being a part of the target triple.


---

Patch is 68.47 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/174454.diff


5 Files Affected:

- (modified) clang/lib/CodeGen/CodeGenModule.cpp (+2-1) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl (+19-20) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl (+160-162) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gws-insts.cl (+1-1) 
- (modified) clang/test/CodeGenOpenCL/cl20-device-side-enqueue-attributes.cl 
(+14-16) 


``````````diff
diff --git a/clang/lib/CodeGen/CodeGenModule.cpp 
b/clang/lib/CodeGen/CodeGenModule.cpp
index 85ed38f144627..aad6118a24a02 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1164,7 +1164,8 @@ void CodeGenModule::Release() {
   // TargetLibraryInfo.
   uint64_t WCharWidth =
       Context.getTypeSizeInChars(Context.getWideCharType()).getQuantity();
-  getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
+  if (!LangOpts.OpenCL)
+    getModule().addModuleFlag(llvm::Module::Error, "wchar_size", WCharWidth);
 
   if (getTriple().isOSzOS()) {
     getModule().addModuleFlag(llvm::Module::Warning,
diff --git a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl 
b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
index 0ca247838f76e..23215d13f746b 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-cluster-dims.cl
@@ -4,25 +4,25 @@
 kernel void foo(global int *p) { *p = 1; }
 // CHECK: Function Attrs: convergent norecurse nounwind
 // CHECK-LABEL: define dso_local amdgpu_kernel void @foo(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] 
!kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual 
[[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type 
[[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR0:[0-9]+]] 
!kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual 
[[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type 
[[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // CHECK-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[P_ADDR]] to ptr
-// CHECK-NEXT:    store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 
8, !tbaa [[INTPTR_TBAA11:![0-9]+]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], 
align 8, !tbaa [[INTPTR_TBAA11]]
+// CHECK-NEXT:    store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 
8, !tbaa [[INTPTR_TBAA10:![0-9]+]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], 
align 8, !tbaa [[INTPTR_TBAA10]]
 // CHECK-NEXT:    call void @__clang_ocl_kern_imp_foo(ptr addrspace(1) noundef 
align 4 [[TMP0]]) #[[ATTR2:[0-9]+]]
 // CHECK-NEXT:    ret void
 //
 //
 // CHECK: Function Attrs: alwaysinline convergent norecurse nounwind
 // CHECK-LABEL: define dso_local void @__clang_ocl_kern_imp_foo(
-// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] 
!kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] 
!kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] 
!kernel_arg_type_qual [[META10]] {
+// CHECK-SAME: ptr addrspace(1) noundef align 4 [[P:%.*]]) #[[ATTR1:[0-9]+]] 
!kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] 
!kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] 
!kernel_arg_type_qual [[META9]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
 // CHECK-NEXT:    [[P_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // CHECK-NEXT:    [[P_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[P_ADDR]] to ptr
-// CHECK-NEXT:    store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 
8, !tbaa [[INTPTR_TBAA11]]
-// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], 
align 8, !tbaa [[INTPTR_TBAA11]]
-// CHECK-NEXT:    store i32 1, ptr addrspace(1) [[TMP0]], align 4, !tbaa 
[[INT_TBAA3:![0-9]+]]
+// CHECK-NEXT:    store ptr addrspace(1) [[P]], ptr [[P_ADDR_ASCAST]], align 
8, !tbaa [[INTPTR_TBAA10]]
+// CHECK-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr [[P_ADDR_ASCAST]], 
align 8, !tbaa [[INTPTR_TBAA10]]
+// CHECK-NEXT:    store i32 1, ptr addrspace(1) [[TMP0]], align 4, !tbaa 
[[INT_TBAA2:![0-9]+]]
 // CHECK-NEXT:    ret void
 //
 //.
@@ -31,17 +31,16 @@ kernel void foo(global int *p) { *p = 1; }
 // CHECK: attributes #[[ATTR2]] = { convergent nounwind }
 //.
 // CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 600}
-// CHECK: [[META1:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
-// CHECK: [[META2:![0-9]+]] = !{i32 2, i32 0}
-// CHECK: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
-// CHECK: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
-// CHECK: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
-// CHECK: [[META6]] = !{!"Simple C/C++ TBAA"}
-// CHECK: [[META7]] = !{i32 1}
-// CHECK: [[META8]] = !{!"none"}
-// CHECK: [[META9]] = !{!"int*"}
-// CHECK: [[META10]] = !{!""}
-// CHECK: [[INTPTR_TBAA11]] = !{[[META12:![0-9]+]], [[META12]], i64 0}
-// CHECK: [[META12]] = !{!"p1 int", [[META13:![0-9]+]], i64 0}
-// CHECK: [[META13]] = !{!"any pointer", [[META5]], i64 0}
+// CHECK: [[META1:![0-9]+]] = !{i32 2, i32 0}
+// CHECK: [[INT_TBAA2]] = !{[[META3:![0-9]+]], [[META3]], i64 0}
+// CHECK: [[META3]] = !{!"int", [[META4:![0-9]+]], i64 0}
+// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0}
+// CHECK: [[META5]] = !{!"Simple C/C++ TBAA"}
+// CHECK: [[META6]] = !{i32 1}
+// CHECK: [[META7]] = !{!"none"}
+// CHECK: [[META8]] = !{!"int*"}
+// CHECK: [[META9]] = !{!""}
+// CHECK: [[INTPTR_TBAA10]] = !{[[META11:![0-9]+]], [[META11]], i64 0}
+// CHECK: [[META11]] = !{!"p1 int", [[META12:![0-9]+]], i64 0}
+// CHECK: [[META12]] = !{!"any pointer", [[META4]], i64 0}
 //.
diff --git a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl 
b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
index 2cbc9787a04b0..d16d4f65930a6 100644
--- a/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
+++ b/clang/test/CodeGenOpenCL/amdgpu-enqueue-kernel.cl
@@ -89,7 +89,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
 // NOCPU-LABEL: define dso_local amdgpu_kernel void @test(
-// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef 
[[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) 
#[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META3:![0-9]+]] 
!kernel_arg_access_qual [[META4:![0-9]+]] !kernel_arg_type [[META5:![0-9]+]] 
!kernel_arg_base_type [[META5]] !kernel_arg_type_qual [[META6:![0-9]+]] {
+// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef 
[[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) 
#[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META2:![0-9]+]] 
!kernel_arg_access_qual [[META3:![0-9]+]] !kernel_arg_type [[META4:![0-9]+]] 
!kernel_arg_base_type [[META4]] !kernel_arg_type_qual [[META5:![0-9]+]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // NOCPU-NEXT:    [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
@@ -113,7 +113,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
 // NOCPU-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
-// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext 
[[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) 
#[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META3]] !kernel_arg_access_qual 
[[META4]] !kernel_arg_type [[META5]] !kernel_arg_base_type [[META5]] 
!kernel_arg_type_qual [[META6]] {
+// NOCPU-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext 
[[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) 
#[[ATTR3:[0-9]+]] !kernel_arg_addr_space [[META2]] !kernel_arg_access_qual 
[[META3]] !kernel_arg_type [[META4]] !kernel_arg_base_type [[META4]] 
!kernel_arg_type_qual [[META5]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // NOCPU-NEXT:    [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
@@ -235,7 +235,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
 // NOCPU-LABEL: define dso_local amdgpu_kernel void 
@test_target_features_kernel(
-// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR4:[0-9]+]] 
!kernel_arg_addr_space [[META7:![0-9]+]] !kernel_arg_access_qual 
[[META8:![0-9]+]] !kernel_arg_type [[META9:![0-9]+]] !kernel_arg_base_type 
[[META9]] !kernel_arg_type_qual [[META10:![0-9]+]] {
+// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR4:[0-9]+]] 
!kernel_arg_addr_space [[META6:![0-9]+]] !kernel_arg_access_qual 
[[META7:![0-9]+]] !kernel_arg_type [[META8:![0-9]+]] !kernel_arg_base_type 
[[META8]] !kernel_arg_type_qual [[META9:![0-9]+]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // NOCPU-NEXT:    [[I_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I_ADDR]] to ptr
@@ -247,7 +247,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent noinline norecurse nounwind optnone
 // NOCPU-LABEL: define dso_local void 
@__clang_ocl_kern_imp_test_target_features_kernel(
-// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] 
!kernel_arg_addr_space [[META7]] !kernel_arg_access_qual [[META8]] 
!kernel_arg_type [[META9]] !kernel_arg_base_type [[META9]] 
!kernel_arg_type_qual [[META10]] {
+// NOCPU-SAME: ptr addrspace(1) noundef align 4 [[I:%.*]]) #[[ATTR5:[0-9]+]] 
!kernel_arg_addr_space [[META6]] !kernel_arg_access_qual [[META7]] 
!kernel_arg_type [[META8]] !kernel_arg_base_type [[META8]] 
!kernel_arg_type_qual [[META9]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[I_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // NOCPU-NEXT:    [[DEFAULT_QUEUE:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
@@ -287,7 +287,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent nounwind
 // NOCPU-LABEL: define internal amdgpu_kernel void @__test_block_invoke_kernel(
-// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) 
#[[ATTR8:[0-9]+]] !associated [[META11:![0-9]+]] !kernel_arg_addr_space 
[[META12:![0-9]+]] !kernel_arg_access_qual [[META8]] !kernel_arg_type 
[[META13:![0-9]+]] !kernel_arg_base_type [[META13]] !kernel_arg_type_qual 
[[META10]] {
+// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0:%.*]]) 
#[[ATTR8:[0-9]+]] !associated [[META10:![0-9]+]] !kernel_arg_addr_space 
[[META11:![0-9]+]] !kernel_arg_access_qual [[META7]] !kernel_arg_type 
[[META12:![0-9]+]] !kernel_arg_base_type [[META12]] !kernel_arg_type_qual 
[[META9]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), i8 
}>, align 8, addrspace(5)
 // NOCPU-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), i8 }> [[TMP0]], 
ptr addrspace(5) [[TMP1]], align 8
@@ -323,7 +323,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent nounwind
 // NOCPU-LABEL: define internal amdgpu_kernel void 
@__test_block_invoke_2_kernel(
-// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META14:![0-9]+]] 
!kernel_arg_addr_space [[META12]] !kernel_arg_access_qual [[META8]] 
!kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] 
!kernel_arg_type_qual [[META10]] {
+// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]]) #[[ATTR8]] !associated [[META13:![0-9]+]] 
!kernel_arg_addr_space [[META11]] !kernel_arg_access_qual [[META7]] 
!kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] 
!kernel_arg_type_qual [[META9]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), 
ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
 // NOCPU-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), 
i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP1]], align 8
@@ -365,7 +365,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent nounwind
 // NOCPU-LABEL: define internal amdgpu_kernel void 
@__test_block_invoke_3_kernel(
-// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR8]] !associated 
[[META15:![0-9]+]] !kernel_arg_addr_space [[META16:![0-9]+]] 
!kernel_arg_access_qual [[META17:![0-9]+]] !kernel_arg_type [[META18:![0-9]+]] 
!kernel_arg_base_type [[META18]] !kernel_arg_type_qual [[META19:![0-9]+]] {
+// NOCPU-SAME: <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), i64, i8 
}> [[TMP0:%.*]], ptr addrspace(3) [[TMP1:%.*]]) #[[ATTR8]] !associated 
[[META14:![0-9]+]] !kernel_arg_addr_space [[META15:![0-9]+]] 
!kernel_arg_access_qual [[META16:![0-9]+]] !kernel_arg_type [[META17:![0-9]+]] 
!kernel_arg_base_type [[META17]] !kernel_arg_type_qual [[META18:![0-9]+]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[TMP2:%.*]] = alloca <{ i32, i32, ptr, ptr addrspace(1), 
ptr addrspace(1), i64, i8 }>, align 8, addrspace(5)
 // NOCPU-NEXT:    store <{ i32, i32, ptr, ptr addrspace(1), ptr addrspace(1), 
i64, i8 }> [[TMP0]], ptr addrspace(5) [[TMP2]], align 8
@@ -394,7 +394,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent nounwind
 // NOCPU-LABEL: define internal amdgpu_kernel void 
@__test_block_invoke_4_kernel(
-// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) 
#[[ATTR8]] !associated [[META20:![0-9]+]] !kernel_arg_addr_space [[META12]] 
!kernel_arg_access_qual [[META8]] !kernel_arg_type [[META13]] 
!kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META10]] {
+// NOCPU-SAME: <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0:%.*]]) 
#[[ATTR8]] !associated [[META19:![0-9]+]] !kernel_arg_addr_space [[META11]] 
!kernel_arg_access_qual [[META7]] !kernel_arg_type [[META12]] 
!kernel_arg_base_type [[META12]] !kernel_arg_type_qual [[META9]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[TMP1:%.*]] = alloca <{ i32, i32, ptr, i64, ptr 
addrspace(1) }>, align 8, addrspace(5)
 // NOCPU-NEXT:    store <{ i32, i32, ptr, i64, ptr addrspace(1) }> [[TMP0]], 
ptr addrspace(5) [[TMP1]], align 8
@@ -419,7 +419,7 @@ kernel void test_target_features_kernel(global int *i) {
 //
 // NOCPU: Function Attrs: convergent nounwind
 // NOCPU-LABEL: define internal amdgpu_kernel void 
@__test_target_features_kernel_block_invoke_kernel(
-// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8]] !associated 
[[META21:![0-9]+]] !kernel_arg_addr_space [[META12]] !kernel_arg_access_qual 
[[META8]] !kernel_arg_type [[META13]] !kernel_arg_base_type [[META13]] 
!kernel_arg_type_qual [[META10]] {
+// NOCPU-SAME: { i32, i32, ptr } [[TMP0:%.*]]) #[[ATTR8]] !associated 
[[META20:![0-9]+]] !kernel_arg_addr_space [[META11]] !kernel_arg_access_qual 
[[META7]] !kernel_arg_type [[META12]] !kernel_arg_base_type [[META12]] 
!kernel_arg_type_qual [[META9]] {
 // NOCPU-NEXT:  [[ENTRY:.*:]]
 // NOCPU-NEXT:    [[TMP1:%.*]] = alloca { i32, i32, ptr }, align 8, 
addrspace(5)
 // NOCPU-NEXT:    store { i32, i32, ptr } [[TMP0]], ptr addrspace(5) [[TMP1]], 
align 8
@@ -451,19 +451,19 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[OUT_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // GFX900-NEXT:    [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[ID_ADDR]] to ptr
 // GFX900-NEXT:    [[OUT_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[OUT_ADDR]] to ptr
-// GFX900-NEXT:    store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa 
[[LONG_TBAA7:![0-9]+]]
-// GFX900-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], 
align 8, !tbaa [[LONGPTR_TBAA9:![0-9]+]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, 
!tbaa [[LONG_TBAA7]]
-// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]]
-// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, 
!tbaa [[LONG_TBAA7]]
+// GFX900-NEXT:    store i64 [[ID]], ptr [[ID_ADDR_ASCAST]], align 8, !tbaa 
[[LONG_TBAA6:![0-9]+]]
+// GFX900-NEXT:    store ptr addrspace(1) [[OUT]], ptr [[OUT_ADDR_ASCAST]], 
align 8, !tbaa [[LONGPTR_TBAA8:![0-9]+]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, 
!tbaa [[LONG_TBAA6]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load ptr addrspace(1), ptr 
[[OUT_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA8]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load i64, ptr [[ID_ADDR_ASCAST]], align 8, 
!tbaa [[LONG_TBAA6]]
 // GFX900-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr 
addrspace(1) [[TMP1]], i64 [[TMP2]]
-// GFX900-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, 
!tbaa [[LONG_TBAA7]]
+// GFX900-NEXT:    store i64 [[TMP0]], ptr addrspace(1) [[ARRAYIDX]], align 8, 
!tbaa [[LONG_TBAA6]]
 // GFX900-NEXT:    ret void
 //
 //
 // GFX900: Function Attrs: convergent norecurse nounwind
 // GFX900-LABEL: define dso_local amdgpu_kernel void @test(
-// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef 
[[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) 
#[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META12:![0-9]+]] 
!kernel_arg_access_qual [[META13:![0-9]+]] !kernel_arg_type [[META14:![0-9]+]] 
!kernel_arg_base_type [[META14]] !kernel_arg_type_qual [[META15:![0-9]+]] {
+// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef 
[[B:%.*]], ptr addrspace(1) noundef align 8 [[C:%.*]], i64 noundef [[D:%.*]]) 
#[[ATTR2:[0-9]+]] !kernel_arg_addr_space [[META11:![0-9]+]] 
!kernel_arg_access_qual [[META12:![0-9]+]] !kernel_arg_type [[META13:![0-9]+]] 
!kernel_arg_base_type [[META13]] !kernel_arg_type_qual [[META14:![0-9]+]] {
 // GFX900-NEXT:  [[ENTRY:.*:]]
 // GFX900-NEXT:    [[A_ADDR:%.*]] = alloca ptr addrspace(1), align 8, 
addrspace(5)
 // GFX900-NEXT:    [[B_ADDR:%.*]] = alloca i8, align 1, addrspace(5)
@@ -473,21 +473,21 @@ kernel void test_target_features_kernel(global int *i) {
 // GFX900-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[B_ADDR]] to ptr
 // GFX900-NEXT:    [[C_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[C_ADDR]] to ptr
 // GFX900-NEXT:    [[D_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[D_ADDR]] to ptr
-// GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 
8, !tbaa [[CHARPTR_TBAA16:![0-9]+]]
-// GFX900-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa 
[[CHAR_TBAA18:![0-9]+]]
-// GFX900-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 
8, !tbaa [[LONGPTR_TBAA9]]
-// GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa 
[[LONG_TBAA7]]
-// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr 
[[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA16]]
-// GFX900-NEXT:    [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, 
!tbaa [[CHAR_TBAA18]]
-// GFX900-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr 
[[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA9]]
-// GFX900-NEXT:    [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, 
!tbaa [[LONG_TBAA7]]
+// GFX900-NEXT:    store ptr addrspace(1) [[A]], ptr [[A_ADDR_ASCAST]], align 
8, !tbaa [[CHARPTR_TBAA15:![0-9]+]]
+// GFX900-NEXT:    store i8 [[B]], ptr [[B_ADDR_ASCAST]], align 1, !tbaa 
[[CHAR_TBAA17:![0-9]+]]
+// GFX900-NEXT:    store ptr addrspace(1) [[C]], ptr [[C_ADDR_ASCAST]], align 
8, !tbaa [[LONGPTR_TBAA8]]
+// GFX900-NEXT:    store i64 [[D]], ptr [[D_ADDR_ASCAST]], align 8, !tbaa 
[[LONG_TBAA6]]
+// GFX900-NEXT:    [[TMP0:%.*]] = load ptr addrspace(1), ptr 
[[A_ADDR_ASCAST]], align 8, !tbaa [[CHARPTR_TBAA15]]
+// GFX900-NEXT:    [[TMP1:%.*]] = load i8, ptr [[B_ADDR_ASCAST]], align 1, 
!tbaa [[CHAR_TBAA17]]
+// GFX900-NEXT:    [[TMP2:%.*]] = load ptr addrspace(1), ptr 
[[C_ADDR_ASCAST]], align 8, !tbaa [[LONGPTR_TBAA8]]
+// GFX900-NEXT:    [[TMP3:%.*]] = load i64, ptr [[D_ADDR_ASCAST]], align 8, 
!tbaa [[LONG_TBAA6]]
 // GFX900-NEXT:    call void @__clang_ocl_kern_imp_test(ptr addrspace(1) 
noundef align 1 [[TMP0]], i8 noundef signext [[TMP1]], ptr addrspace(1) noundef 
align 8 [[TMP2]], i64 noundef [[TMP3]]) #[[ATTR8:[0-9]+]]
 // GFX900-NEXT:    ret void
 //
 //
 // GFX900: Function Attrs: alwaysinline convergent norecurse nounwind
 // GFX900-LABEL: define dso_local void @__clang_ocl_kern_imp_test(
-// GFX900-SAME: ptr addrspace(1) noundef align 1 [[A:%.*]], i8 noundef signext 
[[B:%.*]], ...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/174454
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to