llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-codegen

Author: Jameson Nash (vtjnash)

<details>
<summary>Changes</summary>

This just added unnecessary work to the IR, since they are only used for load 
and store, which just causes some IR noise. Tests updated by UTC script to 
remove the extra lines.

---

Patch is 298.25 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/179327.diff


41 Files Affected:

- (modified) clang/lib/CodeGen/CGClass.cpp (+1-1) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+4-3) 
- (modified) clang/lib/CodeGen/CGExprScalar.cpp (+4-4) 
- (modified) clang/lib/CodeGen/CodeGenFunction.cpp (+1-1) 
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+6-6) 
- (modified) clang/lib/CodeGen/CodeGenPGO.cpp (+1-1) 
- (modified) clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c (-28) 
- (modified) clang/test/CodeGen/AMDGPU/full-bf16.c (-2) 
- (modified) clang/test/CodeGen/amdgpu-abi-version.c (-2) 
- (modified) clang/test/CodeGen/builtins-extended-image.c (-88) 
- (modified) clang/test/CodeGen/builtins-image-load.c (-84) 
- (modified) clang/test/CodeGen/scoped-atomic-ops.c (-102) 
- (modified) clang/test/CodeGen/target-addrspace.cpp (-4) 
- (modified) clang/test/CodeGenCUDA/amdgpu-bf16.cu (-4) 
- (modified) clang/test/CodeGenCUDA/spirv-amdgcn-bf16.cu (-4) 
- (modified) clang/test/CodeGenCXX/dynamic-cast-address-space.cpp (-4) 
- (modified) clang/test/CodeGenHIP/amdgpu-barrier-type.hip (-2) 
- (modified) clang/test/CodeGenHIP/builtins-make-buffer-rsrc.hip (-8) 
- (modified) clang/test/CodeGenHIP/hip_weak_alias.cpp (+18-10) 
- (modified) clang/test/CodeGenHIP/printf.cpp (-8) 
- (modified) clang/test/CodeGenHIP/printf_nonhostcall.cpp (-16) 
- (modified) clang/test/CodeGenHIP/sanitize-undefined-null.hip (-2) 
- (modified) clang/test/CodeGenHIP/spirv-amdgcn-ballot.cpp (-2) 
- (modified) clang/test/CodeGenOpenCL/addr-space-struct-arg.cl (+10-3) 
- (modified) clang/test/CodeGenOpenCL/amdgpu-abi-struct-arg-byref.cl (+1-3) 
- (modified) clang/test/CodeGenOpenCL/atomic-ops.cl (-1) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl (-4) 
- (modified) clang/test/CodeGenOpenCL/builtins-amdgcn-gfx1250.cl (-14) 
- (modified) clang/test/CodeGenOpenCL/check-atomic-alignment.cl (-2) 
- (modified) clang/test/CodeGenSYCL/function-attrs.cpp (+6-8) 
- (modified) clang/test/Headers/__clang_hip_libdevice_declares.cpp (+7-11) 
- (modified) clang/test/Headers/amdgcn_openmp_device_math.c (+107-163) 
- (modified) clang/test/Headers/amdgcn_openmp_device_math_c.c (-6) 
- (modified) clang/test/Headers/amdgcn_openmp_device_math_constexpr.cpp (-32) 
- (modified) clang/test/Headers/gpu_disabled_math.cpp (-2) 
- (modified) clang/test/Headers/gpuintrin.c (+26-66) 
- (modified) clang/test/Headers/gpuintrin_lang.c (-12) 
- (modified) clang/test/Headers/openmp-device-functions-bool.c (+4-8) 
- (modified) clang/test/Headers/openmp_new_nothrow.cpp (-6) 
- (modified) clang/test/OpenMP/amdgcn_weak_alias.c (+26-4) 
- (modified) clang/test/OpenMP/amdgcn_weak_alias.cpp (+20-6) 


``````````diff
diff --git a/clang/lib/CodeGen/CGClass.cpp b/clang/lib/CodeGen/CGClass.cpp
index 02e8912bdbbf0..81ca858d29512 100644
--- a/clang/lib/CodeGen/CGClass.cpp
+++ b/clang/lib/CodeGen/CGClass.cpp
@@ -2495,7 +2495,7 @@ void 
CodeGenFunction::EmitInlinedInheritingCXXConstructorCall(
   // FIXME: This is dumb, we should ask the ABI not to try to set the return
   // value instead.
   if (!RetType->isVoidType())
-    ReturnValue = CreateIRTemp(RetType, "retval.inhctor");
+    ReturnValue = CreateIRTempWithoutCast(RetType, "retval.inhctor");
 
   CGM.getCXXABI().EmitInstanceFunctionProlog(*this);
   CXXThisValue = CXXABIThisValue;
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 339314ecff9cd..a2eb4d5930829 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -181,9 +181,10 @@ RawAddress 
CodeGenFunction::CreateDefaultAlignTempAlloca(llvm::Type *Ty,
   return CreateTempAlloca(Ty, Align, Name);
 }
 
-RawAddress CodeGenFunction::CreateIRTemp(QualType Ty, const Twine &Name) {
+RawAddress CodeGenFunction::CreateIRTempWithoutCast(QualType Ty,
+                                                    const Twine &Name) {
   CharUnits Align = getContext().getTypeAlignInChars(Ty);
-  return CreateTempAlloca(ConvertType(Ty), Align, Name);
+  return CreateTempAllocaWithoutCast(ConvertType(Ty), Align, Name, nullptr);
 }
 
 RawAddress CodeGenFunction::CreateMemTemp(QualType Ty, const Twine &Name,
@@ -6156,7 +6157,7 @@ CodeGenFunction::EmitHLSLOutArgLValues(const 
HLSLOutArgExpr *E, QualType Ty) {
   OpaqueValueMappingData::bind(*this, E->getOpaqueArgLValue(), BaseLV);
 
   QualType ExprTy = E->getType();
-  Address OutTemp = CreateIRTemp(ExprTy);
+  Address OutTemp = CreateIRTempWithoutCast(ExprTy);
   LValue TempLV = MakeAddrLValue(OutTemp, ExprTy);
 
   if (E->isInOut())
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp 
b/clang/lib/CodeGen/CGExprScalar.cpp
index 1b1bc4a11741e..9548b25507274 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2459,8 +2459,8 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction 
&CGF, LValue SrcVal,
     assert(LoadList.size() >= VecTy->getNumElements() &&
            "Flattened type on RHS must have the same number or more elements "
            "than vector on LHS.");
-    llvm::Value *V =
-        CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
+    llvm::Value *V = CGF.Builder.CreateLoad(
+        CGF.CreateIRTempWithoutCast(DestTy, "flatcast.tmp"));
     // write to V.
     for (unsigned I = 0, E = VecTy->getNumElements(); I < E; I++) {
       RValue RVal = CGF.EmitLoadOfLValue(LoadList[I], Loc);
@@ -2478,8 +2478,8 @@ static Value *EmitHLSLElementwiseCast(CodeGenFunction 
&CGF, LValue SrcVal,
            "Flattened type on RHS must have the same number or more elements "
            "than vector on LHS.");
 
-    llvm::Value *V =
-        CGF.Builder.CreateLoad(CGF.CreateIRTemp(DestTy, "flatcast.tmp"));
+    llvm::Value *V = CGF.Builder.CreateLoad(
+        CGF.CreateIRTempWithoutCast(DestTy, "flatcast.tmp"));
     // V is an allocated temporary to build the truncated matrix into.
     for (unsigned I = 0, E = MatTy->getNumElementsFlattened(); I < E; I++) {
       unsigned ColMajorIndex =
diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp 
b/clang/lib/CodeGen/CodeGenFunction.cpp
index 61128316963ac..d8ee39b347697 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -1244,7 +1244,7 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, 
QualType RetTy,
     ReturnValue = Address(Addr, ConvertType(RetTy),
                           CGM.getNaturalTypeAlignment(RetTy), KnownNonNull);
   } else {
-    ReturnValue = CreateIRTemp(RetTy, "retval");
+    ReturnValue = CreateIRTempWithoutCast(RetTy, "retval");
 
     // Tell the epilog emitter to autorelease the result.  We do this
     // now so that various specialized functions can suppress it
diff --git a/clang/lib/CodeGen/CodeGenFunction.h 
b/clang/lib/CodeGen/CodeGenFunction.h
index 226950ab599e3..044b9834b1a92 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -2904,15 +2904,15 @@ class CodeGenFunction : public CodeGenTypeCache {
   RawAddress CreateDefaultAlignTempAlloca(llvm::Type *Ty,
                                           const Twine &Name = "tmp");
 
-  /// CreateIRTemp - Create a temporary IR object of the given type, with
-  /// appropriate alignment. This routine should only be used when an temporary
-  /// value needs to be stored into an alloca (for example, to avoid explicit
-  /// PHI construction), but the type is the IR type, not the type appropriate
-  /// for storing in memory.
+  /// CreateIRTempWithoutCast - Create a temporary IR object of the given type,
+  /// with appropriate alignment. This routine should only be used when an
+  /// temporary value needs to be stored into an alloca (for example, to avoid
+  /// explicit PHI construction), but the type is the IR type, not the type
+  /// appropriate for storing in memory.
   ///
   /// That is, this is exactly equivalent to CreateMemTemp, but calling
   /// ConvertType instead of ConvertTypeForMem.
-  RawAddress CreateIRTemp(QualType T, const Twine &Name = "tmp");
+  RawAddress CreateIRTempWithoutCast(QualType T, const Twine &Name = "tmp");
 
   /// CreateMemTemp - Create a temporary memory object of the given type, with
   /// appropriate alignmen and cast it to the default address space. Returns
diff --git a/clang/lib/CodeGen/CodeGenPGO.cpp b/clang/lib/CodeGen/CodeGenPGO.cpp
index 4921eba7934a2..59faa3aef2460 100644
--- a/clang/lib/CodeGen/CodeGenPGO.cpp
+++ b/clang/lib/CodeGen/CodeGenPGO.cpp
@@ -1505,7 +1505,7 @@ void CodeGenFunction::maybeCreateMCDCCondBitmap() {
     // Note: This doesn't initialize Addrs in invalidated Decisions.
     for (auto *MCDCCondBitmapAddr : PGO->getMCDCCondBitmapAddrArray(Builder))
       *MCDCCondBitmapAddr =
-          CreateIRTemp(getContext().UnsignedIntTy, "mcdc.addr");
+          CreateIRTempWithoutCast(getContext().UnsignedIntTy, "mcdc.addr");
   }
 }
 bool CodeGenFunction::isMCDCDecisionExpr(const Expr *E) const {
diff --git a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c 
b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
index d74470304c69e..1b77ead54d2b2 100644
--- a/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
+++ b/clang/test/CodeGen/AMDGPU/amdgpu-atomic-float.c
@@ -5,16 +5,12 @@
 // SAFE-LABEL: define dso_local float @test_float_post_inc(
 // SAFE-SAME: ) #[[ATTR0:[0-9]+]] {
 // SAFE-NEXT:  [[ENTRY:.*:]]
-// SAFE-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// SAFE-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // SAFE-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 
4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory 
[[META3]]
 // SAFE-NEXT:    ret float [[TMP0]]
 //
 // UNSAFE-LABEL: define dso_local float @test_float_post_inc(
 // UNSAFE-SAME: ) #[[ATTR0:[0-9]+]] {
 // UNSAFE-NEXT:  [[ENTRY:.*:]]
-// UNSAFE-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// UNSAFE-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // UNSAFE-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test_float_post_inc.n to ptr), float 1.000000e+00 seq_cst, align 
4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory 
[[META3]], !amdgpu.ignore.denormal.mode [[META3]]
 // UNSAFE-NEXT:    ret float [[TMP0]]
 //
@@ -27,8 +23,6 @@ float test_float_post_inc()
 // CHECK-LABEL: define dso_local float @test_float_post_dc(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr 
addrspace(1) @test_float_post_dc.n to ptr), float 1.000000e+00 seq_cst, align 
4, !amdgpu.no.fine.grained.memory [[META3:![0-9]+]], !amdgpu.no.remote.memory 
[[META3]]
 // CHECK-NEXT:    ret float [[TMP0]]
 //
@@ -41,8 +35,6 @@ float test_float_post_dc()
 // CHECK-LABEL: define dso_local float @test_float_pre_dc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr 
addrspace(1) @test_float_pre_dc.n to ptr), float 1.000000e+00 seq_cst, align 4, 
!amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = fsub float [[TMP0]], 1.000000e+00
 // CHECK-NEXT:    ret float [[TMP1]]
@@ -56,8 +48,6 @@ float test_float_pre_dc()
 // SAFE-LABEL: define dso_local float @test_float_pre_inc(
 // SAFE-SAME: ) #[[ATTR0]] {
 // SAFE-NEXT:  [[ENTRY:.*:]]
-// SAFE-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// SAFE-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // SAFE-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 
4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // SAFE-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
 // SAFE-NEXT:    ret float [[TMP1]]
@@ -65,8 +55,6 @@ float test_float_pre_dc()
 // UNSAFE-LABEL: define dso_local float @test_float_pre_inc(
 // UNSAFE-SAME: ) #[[ATTR0]] {
 // UNSAFE-NEXT:  [[ENTRY:.*:]]
-// UNSAFE-NEXT:    [[RETVAL:%.*]] = alloca float, align 4, addrspace(5)
-// UNSAFE-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // UNSAFE-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test_float_pre_inc.n to ptr), float 1.000000e+00 seq_cst, align 
4, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory 
[[META3]], !amdgpu.ignore.denormal.mode [[META3]]
 // UNSAFE-NEXT:    [[TMP1:%.*]] = fadd float [[TMP0]], 1.000000e+00
 // UNSAFE-NEXT:    ret float [[TMP1]]
@@ -80,8 +68,6 @@ float test_float_pre_inc()
 // CHECK-LABEL: define dso_local double @test_double_post_inc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test_double_post_inc.n to ptr), double 1.000000e+00 seq_cst, 
align 8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory 
[[META3]]
 // CHECK-NEXT:    ret double [[TMP0]]
 //
@@ -94,8 +80,6 @@ double test_double_post_inc()
 // CHECK-LABEL: define dso_local double @test_double_post_dc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr 
addrspace(1) @test_double_post_dc.n to ptr), double 1.000000e+00 seq_cst, align 
8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    ret double [[TMP0]]
 //
@@ -108,8 +92,6 @@ double test_double_post_dc()
 // CHECK-LABEL: define dso_local double @test_double_pre_dc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr 
addrspace(1) @test_double_pre_dc.n to ptr), double 1.000000e+00 seq_cst, align 
8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = fsub double [[TMP0]], 1.000000e+00
 // CHECK-NEXT:    ret double [[TMP1]]
@@ -123,8 +105,6 @@ double test_double_pre_dc()
 // CHECK-LABEL: define dso_local double @test_double_pre_inc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca double, align 8, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test_double_pre_inc.n to ptr), double 1.000000e+00 seq_cst, align 
8, !amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = fadd double [[TMP0]], 1.000000e+00
 // CHECK-NEXT:    ret double [[TMP1]]
@@ -138,8 +118,6 @@ double test_double_pre_inc()
 // CHECK-LABEL: define dso_local half @test__Float16_post_inc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test__Float16_post_inc.n to ptr), half 0xH3C00 seq_cst, align 2, 
!amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    ret half [[TMP0]]
 //
@@ -152,8 +130,6 @@ _Float16 test__Float16_post_inc()
 // CHECK-LABEL: define dso_local half @test__Float16_post_dc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr 
addrspace(1) @test__Float16_post_dc.n to ptr), half 0xH3C00 seq_cst, align 2, 
!amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    ret half [[TMP0]]
 //
@@ -166,8 +142,6 @@ _Float16 test__Float16_post_dc()
 // CHECK-LABEL: define dso_local half @test__Float16_pre_dc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fsub ptr addrspacecast (ptr 
addrspace(1) @test__Float16_pre_dc.n to ptr), half 0xH3C00 seq_cst, align 2, 
!amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = fsub half [[TMP0]], 0xH3C00
 // CHECK-NEXT:    ret half [[TMP1]]
@@ -181,8 +155,6 @@ _Float16 test__Float16_pre_dc()
 // CHECK-LABEL: define dso_local half @test__Float16_pre_inc(
 // CHECK-SAME: ) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca half, align 2, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw fadd ptr addrspacecast (ptr 
addrspace(1) @test__Float16_pre_inc.n to ptr), half 0xH3C00 seq_cst, align 2, 
!amdgpu.no.fine.grained.memory [[META3]], !amdgpu.no.remote.memory [[META3]]
 // CHECK-NEXT:    [[TMP1:%.*]] = fadd half [[TMP0]], 0xH3C00
 // CHECK-NEXT:    ret half [[TMP1]]
diff --git a/clang/test/CodeGen/AMDGPU/full-bf16.c 
b/clang/test/CodeGen/AMDGPU/full-bf16.c
index d2ec34561cd8e..3ce1027b95805 100644
--- a/clang/test/CodeGen/AMDGPU/full-bf16.c
+++ b/clang/test/CodeGen/AMDGPU/full-bf16.c
@@ -10,10 +10,8 @@
 // CHECK-LABEL: define dso_local bfloat @div(
 // CHECK-SAME: bfloat noundef [[A:%.*]], bfloat noundef [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca bfloat, align 2, addrspace(5)
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca bfloat, align 2, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[A_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[A_ADDR]] to ptr
 // CHECK-NEXT:    [[B_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[B_ADDR]] to ptr
 // CHECK-NEXT:    store bfloat [[A]], ptr [[A_ADDR_ASCAST]], align 2
diff --git a/clang/test/CodeGen/amdgpu-abi-version.c 
b/clang/test/CodeGen/amdgpu-abi-version.c
index b9c1de0521b95..cc6223da76554 100644
--- a/clang/test/CodeGen/amdgpu-abi-version.c
+++ b/clang/test/CodeGen/amdgpu-abi-version.c
@@ -7,8 +7,6 @@
 // CHECK-LABEL: define dso_local i32 @foo(
 // CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr addrspace(4) 
@__oclc_ABI_version, align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = icmp sge i32 [[TMP0]], 500
 // CHECK-NEXT:    [[TMP2:%.*]] = call align 8 dereferenceable(256) ptr 
addrspace(4) @llvm.amdgcn.implicitarg.ptr()
diff --git a/clang/test/CodeGen/builtins-extended-image.c 
b/clang/test/CodeGen/builtins-extended-image.c
index 0dbf81dabd77b..491bbcf7d5412 100644
--- a/clang/test/CodeGen/builtins-extended-image.c
+++ b/clang/test/CodeGen/builtins-extended-image.c
@@ -8,13 +8,11 @@ typedef _Float16 half4 __attribute__((ext_vector_type(4)));
 // CHECK-LABEL: define dso_local <4 x float> 
@test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(
 // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], 
i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) 
#[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, 
addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, 
addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I32_ADDR]] to ptr
@@ -41,13 +39,11 @@ float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_r(float4 
v4f32, float f32, int
 // CHECK-LABEL: define dso_local <4 x float> 
@test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(
 // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], 
i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, 
addrspace(5)
 // CHECK-NEXT:    [[F32_ADDR:%.*]] = alloca float, align 4, addrspace(5)
 // CHECK-NEXT:    [[I32_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
 // CHECK-NEXT:    [[TEX_ADDR:%.*]] = alloca ptr, align 32, addrspace(5)
 // CHECK-NEXT:    [[VEC4I32_ADDR:%.*]] = alloca <4 x i32>, align 16, 
addrspace(5)
-// CHECK-NEXT:    [[RETVAL_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[RETVAL]] to ptr
 // CHECK-NEXT:    [[V4F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[V4F32_ADDR]] to ptr
 // CHECK-NEXT:    [[F32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[F32_ADDR]] to ptr
 // CHECK-NEXT:    [[I32_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) 
[[I32_ADDR]] to ptr
@@ -74,13 +70,11 @@ float4 test_amdgcn_image_gather4_lz_2d_v4f32_f32_g(float4 
v4f32, float f32, int
 // CHECK-LABEL: define dso_local <4 x float> 
@test_amdgcn_image_gather4_lz_2d_v4f32_f32_b(
 // CHECK-SAME: <4 x float> noundef [[V4F32:%.*]], float noundef [[F32:%.*]], 
i32 noundef [[I32:%.*]], ptr [[TEX:%.*]], <4 x i32> noundef [[VEC4I32:%.*]]) 
#[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca <4 x float>, align 16, addrspace(5)
 // CHECK-NEXT:    [[V4F32_ADDR:%.*]] = alloca <4 x float>, align 16, 
addrspace(5)
 // C...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/179327
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to