https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/90849

>From 18c489682d38837d7b0abc7b9ecf829a9df4bd0c Mon Sep 17 00:00:00 2001
From: Marian Lukac <marian.lu...@arm.com>
Date: Thu, 2 May 2024 11:33:13 +0000
Subject: [PATCH 1/3] [Clang] Added lifetime markers for temp. allocated
 Aggregate types

---
 clang/lib/CodeGen/CGCall.cpp                  | 10 ++++
 clang/test/CodeGen/aarch64-byval-temp.c       | 46 +++++++++++++++++--
 ...-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c |  2 +
 ...cle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp |  2 +
 clang/test/CodeGen/nofpclass.c                | 22 +++++----
 5 files changed, 70 insertions(+), 12 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 1b4ca2a8b2fe8..8fdde5326b8ed 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5202,6 +5202,16 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
           Val = Builder.CreateFreeze(Val);
         IRCallArgs[FirstIRArg] = Val;
 
+        // Emit lifetime markers for the temporary alloca.
+        llvm::TypeSize ByvalTempElementSize =
+            CGM.getDataLayout().getTypeAllocSize(Addr.getElementType());
+        llvm::Value *LifetimeSize =
+            EmitLifetimeStart(ByvalTempElementSize, Addr.getPointer());
+
+        // Add cleanup code to emit the end lifetime marker after the call.
+        if (LifetimeSize) // In case we disabled lifetime markers.
+          CallLifetimeEndAfterCall.emplace_back(Addr, LifetimeSize);
+
         I->copyInto(*this, Addr);
       } else {
         // We want to avoid creating an unnecessary temporary+copy here;
diff --git a/clang/test/CodeGen/aarch64-byval-temp.c 
b/clang/test/CodeGen/aarch64-byval-temp.c
index e9e2586406e5c..765a298f83ed0 100644
--- a/clang/test/CodeGen/aarch64-byval-temp.c
+++ b/clang/test/CodeGen/aarch64-byval-temp.c
@@ -1,13 +1,15 @@
-// RUN: %clang_cc1 -emit-llvm -triple arm64-- -o - %s -O0 | FileCheck %s 
--check-prefix=CHECK-O0
-// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns -triple arm64-- -o - %s -O3 
| FileCheck %s --check-prefix=CHECK-O3
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1 -emit-llvm -triple arm64-- 
-fexperimental-max-bitint-width=1024  -o - %s -O0 | FileCheck %s 
--check-prefix=CHECK-O0
+// RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns 
-fexperimental-max-bitint-width=1024  -triple arm64-- -o - %s -O3 | FileCheck 
%s --check-prefix=CHECK-O3
 
 struct large {
     void* pointers[8];
 };
 
 void pass_large(struct large);
+void pass_large_BitInt(_BitInt(129));
 
-// For arm64, we don't use byval to pass structs but instead we create
+// For arm64, we don't use byval to pass structs and _BitInt(>128) type, but 
instead we create
 // temporary allocas.
 //
 // Make sure we generate the appropriate lifetime markers for the temporary
@@ -71,3 +73,41 @@ void example(void) {
 // Mark the end of the lifetime of `l`.
 // CHECK-O3-NEXT: call void @llvm.lifetime.end.p0(i64 64, ptr %l)
 // CHECK-O3-NEXT: ret void
+
+void example_BitInt(void) {
+    _BitInt(129) l = {0};
+    pass_large_BitInt(l);
+    pass_large_BitInt(l);
+}
+// CHECK-O0-LABEL: define dso_local void @example_BitInt(
+// CHECK-O0-NEXT:  entry:
+// CHECK-O0-NEXT:    [[L:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
+// CHECK-O0-NEXT:    store i129 0, ptr [[L]], align 16
+// CHECK-O0-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16
+// CHECK-O0-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16
+// CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef 
[[INDIRECT_ARG_TEMP]])
+// CHECK-O0-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16
+// CHECK-O0-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 16
+// CHECK-O0-NEXT:    call void @pass_large_BitInt(ptr noundef 
[[INDIRECT_ARG_TEMP1]])
+// CHECK-O0-NEXT:    ret void
+//
+// CHECK-O3-LABEL: define dso_local void @example_BitInt(
+// CHECK-O3-NEXT:  entry:
+// CHECK-O3-NEXT:    [[L:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    [[INDIRECT_ARG_TEMP1:%.*]] = alloca i129, align 16
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr [[L]]) 
+// CHECK-O3-NEXT:    store i129 0, ptr [[L]], align 16, !tbaa [[TBAA6:![0-9]+]]
+// CHECK-O3-NEXT:    [[TMP0:%.*]] = load i129, ptr [[L]], align 16, !tbaa 
[[TBAA6]]
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr 
[[INDIRECT_ARG_TEMP]]) 
+// CHECK-O3-NEXT:    store i129 [[TMP0]], ptr [[INDIRECT_ARG_TEMP]], align 16, 
!tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef 
[[INDIRECT_ARG_TEMP]])
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr 
[[INDIRECT_ARG_TEMP]]) 
+// CHECK-O3-NEXT:    [[TMP1:%.*]] = load i129, ptr [[L]], align 16, !tbaa 
[[TBAA6]]
+// CHECK-O3-NEXT:    call void @llvm.lifetime.start.p0(i64 32, ptr 
[[INDIRECT_ARG_TEMP1]]) 
+// CHECK-O3-NEXT:    store i129 [[TMP1]], ptr [[INDIRECT_ARG_TEMP1]], align 
16, !tbaa [[TBAA6]]
+// CHECK-O3-NEXT:    call void @pass_large_BitInt(ptr noundef 
[[INDIRECT_ARG_TEMP1]])
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr 
[[INDIRECT_ARG_TEMP1]]) 
+// CHECK-O3-NEXT:    call void @llvm.lifetime.end.p0(i64 32, ptr [[L]]) 
diff --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
index a4abe96cc08a6..55e1ed393d848 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.c
@@ -88,8 +88,10 @@ typedef svint8_t vec2 
__attribute__((arm_sve_vector_bits(N)));
 // CHECK-NEXT: entry:
 // CHECK-NEXT:   [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS,8)]] x i8>, 
align 16
 // CHECK-NEXT:   [[X:%.*]] = tail call <[[#div(VBITS,8)]] x i8> 
@llvm.vector.extract.v[[#div(VBITS,8)]]i8.nxv16i8(<vscale x 16 x i8> 
[[X_COERCE:%.*]], i64 0)
+// CHECK-NEXT:   call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], ptr 
nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:   store <[[#div(VBITS,8)]] x i8> [[X]], ptr 
[[INDIRECT_ARG_TEMP]], align 16, [[TBAA6]]
 // CHECK-NEXT:   call void @f3(ptr noundef nonnull [[INDIRECT_ARG_TEMP]]) 
[[ATTR5:#.*]]
+// CHECK-NEXT:   call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull 
[[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECK-NEXT:   ret void
 
 // CHECK128-LABEL: declare void @f3(<16 x i8> noundef)
diff --git 
a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp 
b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
index 05587fd9e7fe2..30ea73b63bce1 100644
--- a/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
+++ b/clang/test/CodeGen/aarch64-sve-acle-__ARM_FEATURE_SVE_VECTOR_OPERATORS.cpp
@@ -73,8 +73,10 @@ typedef svint16_t vec2 
__attribute__((arm_sve_vector_bits(N)));
 // CHECK128-NEXT:   ret void
 // CHECKWIDE-NEXT:   [[INDIRECT_ARG_TEMP:%.*]] = alloca <[[#div(VBITS, 16)]] x 
i16>, align 16
 // CHECKWIDE-NEXT:   [[X:%.*]] = tail call <[[#div(VBITS, 16)]] x i16> 
@llvm.vector.extract.v[[#div(VBITS, 16)]]i16.nxv8i16(<vscale x 8 x i16> 
[[X_COERCE:%.*]], i64 0)
+// CHECKWIDE-NEXT:   call void @llvm.lifetime.start.p0(i64 [[SIZE:[0-9]+]], 
ptr nonnull [[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECKWIDE-NEXT:   store <[[#div(VBITS, 16)]] x i16> [[X]], ptr 
[[INDIRECT_ARG_TEMP]], align 16, [[TBAA6:!tbaa !.*]]
 // CHECKWIDE-NEXT:   call void @_Z1fDv[[#div(VBITS, 16)]]_s(ptr noundef 
nonnull [[INDIRECT_ARG_TEMP]]) [[ATTR5:#.*]]
+// CHECKWIDE-NEXT:   call void @llvm.lifetime.end.p0(i64 [[SIZE]], ptr nonnull 
[[INDIRECT_ARG_TEMP]]) #[[ATTR6:[0-9]+]]
 // CHECKWIDE-NEXT:   ret void
 void g(vec2 x) { f(x); } // OK
 #endif
diff --git a/clang/test/CodeGen/nofpclass.c b/clang/test/CodeGen/nofpclass.c
index dd90d02f7759b..fc4c64f9b921b 100644
--- a/clang/test/CodeGen/nofpclass.c
+++ b/clang/test/CodeGen/nofpclass.c
@@ -172,7 +172,7 @@ double2 defined_func_v2f64(double2 a, double2 b, double2 c) 
{
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) float 
@call_extern_func
 // CLFINITEONLY-SAME: (float noundef nofpclass(nan inf) [[A:%.*]], double 
noundef nofpclass(nan inf) [[B:%.*]], half noundef nofpclass(nan inf) 
[[C:%.*]]) local_unnamed_addr #[[ATTR3:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef 
nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) 
#[[ATTR10:[0-9]+]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
float @extern_func(float noundef nofpclass(nan inf) [[A]], double noundef 
nofpclass(nan inf) [[B]], half noundef nofpclass(nan inf) [[C]]) 
#[[ATTR11:[0-9]+]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -249,7 +249,7 @@ float call_extern_func(float a, double b, _Float16 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) double 
@call_extern_func_vec
 // CLFINITEONLY-SAME: (double noundef nofpclass(nan inf) [[A_COERCE:%.*]], <2 
x double> noundef nofpclass(nan inf) [[B:%.*]], i32 noundef [[C_COERCE:%.*]]) 
local_unnamed_addr #[[ATTR5:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x 
double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
double @extern_func_vec(double noundef nofpclass(nan inf) [[A_COERCE]], <2 x 
double> noundef nofpclass(nan inf) [[B]], i32 noundef [[C_COERCE]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret double [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -389,7 +389,7 @@ float2 call_extern_func_vec(float2 a, double2 b, half2 c) {
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <2 x float> 
@defined_complex_func
 // CLFINITEONLY-SAME: (<2 x float> noundef nofpclass(nan inf) 
[[A_COERCE:%.*]], double noundef nofpclass(nan inf) [[B_COERCE0:%.*]], double 
noundef nofpclass(nan inf) [[B_COERCE1:%.*]], <2 x half> noundef nofpclass(nan 
inf) [[C_COERCE:%.*]]) local_unnamed_addr #[[ATTR6:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
<2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) 
[[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef 
nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) 
[[C_COERCE]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
<2 x float> @extern_complex(<2 x float> noundef nofpclass(nan inf) 
[[A_COERCE]], double noundef nofpclass(nan inf) [[B_COERCE0]], double noundef 
nofpclass(nan inf) [[B_COERCE1]], <2 x half> noundef nofpclass(nan inf) 
[[C_COERCE]]) #[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret <2 x float> [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -927,12 +927,14 @@ _Complex _Float16 defined_complex_func_f16_ret(_Complex 
_Float16 c) {
 // CLFINITEONLY-NEXT:    [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8
 // CLFINITEONLY-NEXT:    [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr 
[[CF16]], i64 2
 // CLFINITEONLY-NEXT:    [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], 
align 2
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull 
[[INDIRECT_ARG_TEMP]]) #[[ATTR12:[0-9]+]]
 // CLFINITEONLY-NEXT:    [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr 
inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE0]], ptr 
[[INDIRECT_ARG_TEMP]], align 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE1]], ptr 
[[INDIRECT_ARG_TEMP_IMAGP]], align 8
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement 
<2 x half> poison, half [[CF16_REAL]], i64 0
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement 
<2 x half> [[COERCE5_SROA_0_0_VEC_INSERT]], half [[CF16_IMAG]], i64 1
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double 
noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], 
half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) 
[[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 
noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) 
[[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 
[[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) 
[[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
float (float, ...) @variadic(float noundef nofpclass(nan inf) [[F32]], double 
noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], 
half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) 
[[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 
noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) 
[[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 
[[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) 
[[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR11]]
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull 
[[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -1178,12 +1180,14 @@ float call_variadic(float f32, double f64, _Float16 f16,
 // CLFINITEONLY-NEXT:    [[CF16_REAL:%.*]] = load half, ptr [[CF16]], align 8
 // CLFINITEONLY-NEXT:    [[CF16_IMAGP:%.*]] = getelementptr inbounds i8, ptr 
[[CF16]], i64 2
 // CLFINITEONLY-NEXT:    [[CF16_IMAG:%.*]] = load half, ptr [[CF16_IMAGP]], 
align 2
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.start.p0(i64 16, ptr nonnull 
[[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    [[INDIRECT_ARG_TEMP_IMAGP:%.*]] = getelementptr 
inbounds i8, ptr [[INDIRECT_ARG_TEMP]], i64 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE0]], ptr 
[[INDIRECT_ARG_TEMP]], align 8
 // CLFINITEONLY-NEXT:    store double [[CF64_COERCE1]], ptr 
[[INDIRECT_ARG_TEMP_IMAGP]], align 8
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_0_VEC_INSERT:%.*]] = insertelement 
<2 x half> poison, half [[CF16_REAL]], i64 0
 // CLFINITEONLY-NEXT:    [[COERCE5_SROA_0_2_VEC_INSERT:%.*]] = insertelement 
<2 x half> [[COERCE5_SROA_0_0_VEC_INSERT]], half [[CF16_IMAG]], i64 1
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
float (float, ...) [[FPTR]](float noundef nofpclass(nan inf) [[F32]], double 
noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], 
half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) 
[[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 
noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) 
[[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 
[[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) 
[[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
float (float, ...) [[FPTR]](float noundef nofpclass(nan inf) [[F32]], double 
noundef nofpclass(nan inf) [[CONV]], double noundef nofpclass(nan inf) [[F64]], 
half noundef nofpclass(nan inf) [[F16]], double noundef nofpclass(nan inf) 
[[V2F32_COERCE]], <2 x double> noundef nofpclass(nan inf) [[V2F64]], i32 
noundef [[V2F16_COERCE]], <2 x float> noundef nofpclass(nan inf) 
[[CF32_COERCE]], ptr noundef nonnull byval({ double, double }) align 8 
[[INDIRECT_ARG_TEMP]], <2 x half> noundef nofpclass(nan inf) 
[[COERCE5_SROA_0_2_VEC_INSERT]]) #[[ATTR11]]
+// CLFINITEONLY-NEXT:    call void @llvm.lifetime.end.p0(i64 16, ptr nonnull 
[[INDIRECT_ARG_TEMP]]) #[[ATTR12]]
 // CLFINITEONLY-NEXT:    ret float [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -1364,9 +1368,9 @@ extern __m256d extern_m256d(__m256d, ...);
 //
 // CLFINITEONLY: Function Attrs: convergent norecurse nounwind
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <4 x double> 
@call_m256d
-// CLFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR8:[0-9]+]] {
+// CLFINITEONLY-SAME: (<4 x double> noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR9:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
<4 x double> (<4 x double>, ...) @extern_m256d(<4 x double> noundef 
nofpclass(nan inf) [[X]], <4 x double> noundef nofpclass(nan inf) [[X]]) 
#[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
<4 x double> (<4 x double>, ...) @extern_m256d(<4 x double> noundef 
nofpclass(nan inf) [[X]], <4 x double> noundef nofpclass(nan inf) [[X]]) 
#[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret <4 x double> [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone
@@ -1407,9 +1411,9 @@ __m256d call_m256d(__m256d x) {
 //
 // CLFINITEONLY: Function Attrs: convergent norecurse nounwind
 // CLFINITEONLY-LABEL: define dso_local nofpclass(nan inf) <25 x double> 
@call_matrix
-// CLFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR9:[0-9]+]] {
+// CLFINITEONLY-SAME: (<25 x double> noundef nofpclass(nan inf) [[X:%.*]]) 
local_unnamed_addr #[[ATTR10:[0-9]+]] {
 // CLFINITEONLY-NEXT:  entry:
-// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
<25 x double> @extern_matrix(<25 x double> noundef nofpclass(nan inf) [[X]]) 
#[[ATTR10]]
+// CLFINITEONLY-NEXT:    [[CALL:%.*]] = tail call nnan ninf nofpclass(nan inf) 
<25 x double> @extern_matrix(<25 x double> noundef nofpclass(nan inf) [[X]]) 
#[[ATTR11]]
 // CLFINITEONLY-NEXT:    ret <25 x double> [[CALL]]
 //
 // NONANS: Function Attrs: noinline nounwind optnone

>From c9dad42ae9017405e5dbd323596eac4be59e4498 Mon Sep 17 00:00:00 2001
From: Marian Lukac <marian.lu...@arm.com>
Date: Wed, 15 May 2024 11:14:38 +0000
Subject: [PATCH 2/3] Restructured the whole case statement

---
 clang/lib/CodeGen/CGCall.cpp | 69 +++++++++++++-----------------------
 1 file changed, 25 insertions(+), 44 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 8fdde5326b8ed..19d62a74cfbc4 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -5192,28 +5192,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
     case ABIArgInfo::Indirect:
     case ABIArgInfo::IndirectAliased: {
       assert(NumIRArgs == 1);
-      if (!I->isAggregate()) {
-        // Make a temporary alloca to pass the argument.
-        RawAddress Addr = CreateMemTempWithoutCast(
-            I->Ty, ArgInfo.getIndirectAlign(), "indirect-arg-temp");
-
-        llvm::Value *Val = getAsNaturalPointerTo(Addr, I->Ty);
-        if (ArgHasMaybeUndefAttr)
-          Val = Builder.CreateFreeze(Val);
-        IRCallArgs[FirstIRArg] = Val;
-
-        // Emit lifetime markers for the temporary alloca.
-        llvm::TypeSize ByvalTempElementSize =
-            CGM.getDataLayout().getTypeAllocSize(Addr.getElementType());
-        llvm::Value *LifetimeSize =
-            EmitLifetimeStart(ByvalTempElementSize, Addr.getPointer());
-
-        // Add cleanup code to emit the end lifetime marker after the call.
-        if (LifetimeSize) // In case we disabled lifetime markers.
-          CallLifetimeEndAfterCall.emplace_back(Addr, LifetimeSize);
-
-        I->copyInto(*this, Addr);
-      } else {
+      if (I->isAggregate()) {
         // We want to avoid creating an unnecessary temporary+copy here;
         // however, we need one in three cases:
         // 1. If the argument is not byval, and we are required to copy the
@@ -5266,28 +5245,7 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
           }
         }
 
-        if (NeedCopy) {
-          // Create an aligned temporary, and copy to it.
-          RawAddress AI = CreateMemTempWithoutCast(
-              I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
-          llvm::Value *Val = getAsNaturalPointerTo(AI, I->Ty);
-          if (ArgHasMaybeUndefAttr)
-            Val = Builder.CreateFreeze(Val);
-          IRCallArgs[FirstIRArg] = Val;
-
-          // Emit lifetime markers for the temporary alloca.
-          llvm::TypeSize ByvalTempElementSize =
-              CGM.getDataLayout().getTypeAllocSize(AI.getElementType());
-          llvm::Value *LifetimeSize =
-              EmitLifetimeStart(ByvalTempElementSize, AI.getPointer());
-
-          // Add cleanup code to emit the end lifetime marker after the call.
-          if (LifetimeSize) // In case we disabled lifetime markers.
-            CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize);
-
-          // Generate the copy.
-          I->copyInto(*this, AI);
-        } else {
+        if (!NeedCopy) {
           // Skip the extra memcpy call.
           llvm::Value *V = getAsNaturalPointerTo(Addr, I->Ty);
           auto *T = llvm::PointerType::get(
@@ -5299,8 +5257,31 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo 
&CallInfo,
           if (ArgHasMaybeUndefAttr)
             Val = Builder.CreateFreeze(Val);
           IRCallArgs[FirstIRArg] = Val;
+          break;
         }
       }
+
+      // For non-aggregate args and aggregate args meeting conditions above
+      // we need to create an aligned temporary, and copy to it.
+      RawAddress AI = CreateMemTempWithoutCast(
+          I->Ty, ArgInfo.getIndirectAlign(), "byval-temp");
+      llvm::Value *Val = getAsNaturalPointerTo(AI, I->Ty);
+      if (ArgHasMaybeUndefAttr)
+        Val = Builder.CreateFreeze(Val);
+      IRCallArgs[FirstIRArg] = Val;
+
+      // Emit lifetime markers for the temporary alloca.
+      llvm::TypeSize ByvalTempElementSize =
+          CGM.getDataLayout().getTypeAllocSize(AI.getElementType());
+      llvm::Value *LifetimeSize =
+          EmitLifetimeStart(ByvalTempElementSize, AI.getPointer());
+
+      // Add cleanup code to emit the end lifetime marker after the call.
+      if (LifetimeSize) // In case we disabled lifetime markers.
+        CallLifetimeEndAfterCall.emplace_back(AI, LifetimeSize);
+
+      // Generate the copy.
+      I->copyInto(*this, AI);
       break;
     }
 

>From 7322f1f64dcbe296d87388ed2e39c2195752b5b6 Mon Sep 17 00:00:00 2001
From: Marian Lukac <marian.lu...@arm.com>
Date: Mon, 20 May 2024 11:23:30 +0000
Subject: [PATCH 3/3] removed autogenerated note

---
 clang/test/CodeGen/aarch64-byval-temp.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/clang/test/CodeGen/aarch64-byval-temp.c 
b/clang/test/CodeGen/aarch64-byval-temp.c
index 765a298f83ed0..0384830c69a41 100644
--- a/clang/test/CodeGen/aarch64-byval-temp.c
+++ b/clang/test/CodeGen/aarch64-byval-temp.c
@@ -1,4 +1,3 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
 // RUN: %clang_cc1 -emit-llvm -triple arm64-- 
-fexperimental-max-bitint-width=1024  -o - %s -O0 | FileCheck %s 
--check-prefix=CHECK-O0
 // RUN: %clang_cc1 -emit-llvm -disable-llvm-optzns 
-fexperimental-max-bitint-width=1024  -triple arm64-- -o - %s -O3 | FileCheck 
%s --check-prefix=CHECK-O3
 

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to