https://github.com/serge-sans-paille updated https://github.com/llvm/llvm-project/pull/79502
>From 1ca9d2d7697528b1a126cc95a2fb7a9e5bb8669a Mon Sep 17 00:00:00 2001 From: serge-sans-paille <sguel...@mozilla.com> Date: Thu, 25 Jan 2024 22:12:55 +0100 Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a partially initialized array Fix #79500 --- clang/lib/CodeGen/CGDecl.cpp | 109 +++++++++++++++++- clang/test/CodeGen/array-init.c | 50 ++++++++ .../test/CodeGenCXX/trivial-auto-var-init.cpp | 6 +- .../test/CodeGenOpenCL/partial_initializer.cl | 3 +- 4 files changed, 163 insertions(+), 5 deletions(-) diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp index bbe14ef4c17244f..724dcf6464aef10 100644 --- a/clang/lib/CodeGen/CGDecl.cpp +++ b/clang/lib/CodeGen/CGDecl.cpp @@ -905,6 +905,98 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, const ValueDecl *D, EmitStoreOfScalar(value, lvalue, /* isInitialization */ true); } +static bool isNullOrUndef(llvm::Constant *C) { + return C->isNullValue() || isa<llvm::ConstantAggregateZero>(C) || + isa<llvm::ConstantPointerNull>(C) || isa<llvm::UndefValue>(C); +} + +static size_t CountLeadingNonNullBytes(const llvm::DataLayout &DL, + llvm::Constant *Init) { + // Zero and Undef never requires any extra stores. + if (isNullOrUndef(Init)) + return 0u; + + if (isa<llvm::ConstantInt>(Init) || isa<llvm::ConstantFP>(Init) || + isa<llvm::ConstantVector>(Init) || isa<llvm::BlockAddress>(Init) || + isa<llvm::ConstantExpr>(Init)) + return DL.getTypeAllocSize(Init->getType()); + + // For array, consider each element independently + if (auto *CA = dyn_cast<llvm::ConstantArray>(Init)) { + + llvm::ArrayType *CAT = CA->getType(); + uint64_t NumElements = CAT->getNumElements(); + uint64_t ElementByteCount = DL.getTypeAllocSize(CAT->getElementType()); + + unsigned LeadingNonNullElementsCount = 0; + for (; LeadingNonNullElementsCount != NumElements; + ++LeadingNonNullElementsCount) { + auto *Elt = + cast<llvm::Constant>(Init->getOperand(LeadingNonNullElementsCount)); + if (isNullOrUndef(Elt)) + break; + } + + unsigned TrailingNonNullBytes = 0; + if (LeadingNonNullElementsCount != 0) { + LeadingNonNullElementsCount -= 1; + TrailingNonNullBytes = CountLeadingNonNullBytes( + DL, + cast<llvm::Constant>(Init->getOperand(LeadingNonNullElementsCount))); + } + + return LeadingNonNullElementsCount * ElementByteCount + + TrailingNonNullBytes; + ; + } + + // For records, per field + if (auto *CS = dyn_cast<llvm::ConstantStruct>(Init)) { + llvm::StructType *CST = CS->getType(); + const llvm::StructLayout *SL = DL.getStructLayout(CST); + + uint64_t NumElements = CST->getNumElements(); + uint64_t LeadingNonNullFieldsCount = 0; + + for (; LeadingNonNullFieldsCount != NumElements; + ++LeadingNonNullFieldsCount) { + auto *Elt = + cast<llvm::Constant>(Init->getOperand(LeadingNonNullFieldsCount)); + if (isNullOrUndef(Elt)) + break; + } + + unsigned TrailingNonNullBytes = 0; + if (LeadingNonNullFieldsCount != 0) { + LeadingNonNullFieldsCount -= 1; + TrailingNonNullBytes = CountLeadingNonNullBytes( + DL, + cast<llvm::Constant>(Init->getOperand(LeadingNonNullFieldsCount))); + } + + return SL->getElementOffset(LeadingNonNullFieldsCount) + + TrailingNonNullBytes; + } + + if (llvm::ConstantDataSequential *CDS = + dyn_cast<llvm::ConstantDataSequential>(Init)) { + size_t LeadingNonNullElementCount = 0; + uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType()); + for (unsigned NumElements = CDS->getNumElements(); + LeadingNonNullElementCount != NumElements; + ++LeadingNonNullElementCount) { + llvm::Constant *Elt = + CDS->getElementAsConstant(LeadingNonNullElementCount); + if (isNullOrUndef(Elt)) + break; + } + return LeadingNonNullElementCount * ElementByteCount; + } + + // Anything else is hard and scary. + return 0; +} + /// Decide whether we can emit the non-zero parts of the specified initializer /// with equal or fewer than NumStores scalar stores. static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init, @@ -1209,8 +1301,21 @@ static void emitStoresForConstant(CodeGenModule &CGM, const VarDecl &D, // If the initializer is all or mostly the same, codegen with bzero / memset // then do a few stores afterward. if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) { - auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0), - SizeVal, isVolatile); + size_t LeadingNonNullBytes = + CountLeadingNonNullBytes(CGM.getDataLayout(), constant); + + llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0); + Address AdjustedLoc = + LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP( + Loc.withElementType(CGM.Int8Ty), + CharUnits::fromQuantity(LeadingNonNullBytes)) + : Loc; + auto *I = Builder.CreateMemSet( + AdjustedLoc, Z8, + llvm::ConstantInt::get(CGM.IntPtrTy, + ConstantSize - LeadingNonNullBytes), + isVolatile); + if (IsAutoInit) I->addAnnotationMetadata("auto-init"); diff --git a/clang/test/CodeGen/array-init.c b/clang/test/CodeGen/array-init.c index 62e87edc2974197..d5c01e016660275 100644 --- a/clang/test/CodeGen/array-init.c +++ b/clang/test/CodeGen/array-init.c @@ -1,3 +1,4 @@ +// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck -check-prefix=CHECK-NO-MERGE-CONSTANTS %s // RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -fmerge-all-constants -emit-llvm -o - | FileCheck -check-prefix=CHECK-MERGE-CONSTANTS %s @@ -13,3 +14,52 @@ void testConstArrayInits(void) const int a2[5] = {0,0,0}; const int a3[5] = {0}; } + + +// CHECK-LABEL: @testConstLongArrayInits() +// CHECK: entry: +// CHECK-NEXT: %a1 = alloca [20 x i32], align 16 +// CHECK-NEXT: %a2 = alloca [20 x %struct.anon], align 16 +// CHECK-NEXT: %a3 = alloca [20 x %struct.anon.0], align 16 +// CHECK-NEXT: %a4 = alloca [20 x %struct.anon.1], align 16 +// +// CHECK-NEXT: %0 = getelementptr inbounds i8, ptr %a1, i64 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %0, i8 0, i64 72, i1 false) +// CHECK-NEXT: %1 = getelementptr inbounds <{ i32, i32, [18 x i32] }>, ptr %a1, i32 0, i32 0 +// CHECK-NEXT: store i32 1, ptr %1, align 16 +// CHECK-NEXT: %2 = getelementptr inbounds <{ i32, i32, [18 x i32] }>, ptr %a1, i32 0, i32 1 +// CHECK-NEXT: store i32 2, ptr %2, align 4 +// +// CHECK-NEXT: %3 = getelementptr inbounds i8, ptr %a2, i64 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %3, i8 0, i64 152, i1 false) +// CHECK-NEXT: %4 = getelementptr inbounds <{ %struct.anon, [19 x %struct.anon] }>, ptr %a2, i32 0, i32 0 +// CHECK-NEXT: %5 = getelementptr inbounds %struct.anon, ptr %4, i32 0, i32 0 +// CHECK-NEXT: store i8 1, ptr %5, align 16 +// CHECK-NEXT: %6 = getelementptr inbounds %struct.anon, ptr %4, i32 0, i32 1 +// CHECK-NEXT: store i32 2, ptr %6, align 4 +// +// CHECK-NEXT: %7 = getelementptr inbounds i8, ptr %a3, i64 1 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 1 %7, i8 0, i64 159, i1 false) +// CHECK-NEXT: %8 = getelementptr inbounds <{ %struct.anon.0, [19 x %struct.anon.0] }>, ptr %a3, i32 0, i32 0 +// CHECK-NEXT: %9 = getelementptr inbounds %struct.anon.0, ptr %8, i32 0, i32 0 +// CHECK-NEXT: store i8 1, ptr %9, align 16 +// +// CHECK-NEXT: %10 = getelementptr inbounds i8, ptr %a4, i64 8 +// CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 %10, i8 0, i64 392, i1 false) +// CHECK-NEXT: %11 = getelementptr inbounds <{ %struct.anon.1, [19 x %struct.anon.1] }>, ptr %a4, i32 0, i32 0 +// CHECK-NEXT: %12 = getelementptr inbounds %struct.anon.1, ptr %11, i32 0, i32 0 +// CHECK-NEXT: store i8 1, ptr %12, align 16 +// CHECK-NEXT: %13 = getelementptr inbounds %struct.anon.1, ptr %11, i32 0, i32 1 +// CHECK-NEXT: %14 = getelementptr inbounds [4 x i32], ptr %13, i32 0, i32 0 +// CHECK-NEXT: store i32 2, ptr %14, align 4 +// +// CHECK-NEXT: ret void +// } + +void testConstLongArrayInits(void) +{ + const int a1[20] = {1,2}; + const struct {char c; int i;} a2[20] = {{1,2}}; + const struct {char c; int i;} a3[20] = {{1}}; + const struct {char c; int i[4];} a4[20] = {{1,{2}}}; +} diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp index eed9868cad07f84..b0deb8149ed936f 100644 --- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp +++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp @@ -288,14 +288,16 @@ void test_huge_uninit() { // UNINIT-LABEL: test_huge_small_init( // ZERO-LABEL: test_huge_small_init( -// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536, +// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4 +// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532, // ZERO-NOT: !annotation // ZERO: store i8 97, // ZERO: store i8 98, // ZERO: store i8 99, // ZERO: store i8 100, // PATTERN-LABEL: test_huge_small_init( -// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536, +// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4 +// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532, // PATTERN-NOT: !annotation // PATTERN: store i8 97, // PATTERN: store i8 98, diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl b/clang/test/CodeGenOpenCL/partial_initializer.cl index 5cc4e2b246003a1..7c01c750d1afef2 100644 --- a/clang/test/CodeGenOpenCL/partial_initializer.cl +++ b/clang/test/CodeGenOpenCL/partial_initializer.cl @@ -35,7 +35,8 @@ void f(void) { // CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8 // CHECK: %[[V2:.*]] = alloca <4 x i32>, align 16 - // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %A, i8 0, i32 144, i1 false) + // CHECK: %[[v0:.*]] = getelementptr inbounds i8, ptr %A, i32 8 + // CHECK: call void @llvm.memset.p0.i32(ptr align 4 %[[v0]], i8 0, i32 136, i1 false) // CHECK: %[[v2:.*]] = getelementptr inbounds [6 x [6 x float]], ptr %A, i32 0, i32 0 // CHECK: %[[v3:.*]] = getelementptr inbounds [6 x float], ptr %[[v2]], i32 0, i32 0 // CHECK: store float 1.000000e+00, ptr %[[v3]], align 4 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits