mattd created this revision.
mattd added a reviewer: majnemer.
This change avoids the overhead of storing, and later crawling,
an initializer list of all zeros for arrays. When LLVM
visits this (llvm/IR/Constants.cpp) ConstantArray::getImpl()
it will scan the list looking for an array of all zero.
We can avoid the store, and short-cut the scan, by detecting
all zeros when clang builds-up the initialization representation.
This was brought to my attention when investigating PR36030
https://reviews.llvm.org/D42549
Files:
lib/CodeGen/CGExprConstant.cpp
test/CodeGen/array-init.c
Index: test/CodeGen/array-init.c
===================================================================
--- test/CodeGen/array-init.c
+++ test/CodeGen/array-init.c
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck %s
+
+// CHECK: @test.a1 = internal global [10 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
+// CHECK: @test.a2 = internal global [10 x i32] zeroinitializer, align 16
+// CHECK: @test.a3 = internal global [10 x i32] zeroinitializer, align 16
+// CHECK: @test.a4 = internal global [10 x i32] zeroinitializer, align 16
+// CHECK: @test.b1 = internal constant [10 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
+// CHECK: @test.b2 = internal constant [10 x i32] zeroinitializer, align 16
+// CHECK: @test.b3 = internal constant [10 x i32] zeroinitializer, align 16
+
+// CHECK: define void @test() #0 {
+// CHECK: %c1 = alloca [10 x i32], align 16
+// CHECK: %c2 = alloca [10 x i32], align 16
+// CHECK: %c3 = alloca [10 x i32], align 16
+// CHECK: %c4 = alloca [10 x i32], align 16
+// CHECK: %d1 = alloca [10 x i32], align 16
+// CHECK: %d2 = alloca [10 x i32], align 16
+// CHECK: %d3 = alloca [10 x i32], align 16
+// CHECK: %d4 = alloca [10 x i32], align 16
+// CHECK: %1 = bitcast [10 x i32]* %c1 to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 40, i1 false)
+// CHECK: %2 = bitcast i8* %1 to [10 x i32]*
+// CHECK: %3 = getelementptr [10 x i32], [10 x i32]* %2, i32 0, i32 1
+// CHECK: store i32 1, i32* %3
+// CHECK: %4 = getelementptr [10 x i32], [10 x i32]* %2, i32 0, i32 2
+// CHECK: store i32 2, i32* %4
+// CHECK: %5 = bitcast [10 x i32]* %c2 to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %5, i8 0, i64 40, i1 false)
+// CHECK: %6 = bitcast [10 x i32]* %c3 to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %6, i8 0, i64 40, i1 false)
+// CHECK: %7 = bitcast [10 x i32]* %d1 to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %7, i8 0, i64 40, i1 true)
+// CHECK: %8 = bitcast i8* %7 to [10 x i32]*
+// CHECK: %9 = getelementptr [10 x i32], [10 x i32]* %8, i32 0, i32 1
+// CHECK: store volatile i32 1, i32* %9
+// CHECK: %10 = getelementptr [10 x i32], [10 x i32]* %8, i32 0, i32 2
+// CHECK: store volatile i32 2, i32* %10
+// CHECK: %11 = bitcast [10 x i32]* %d2 to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %11, i8 0, i64 40, i1 true)
+// CHECK: %12 = bitcast [10 x i32]* %d3 to i8*
+// CHECK: call void @llvm.memset.p0i8.i64(i8* align 16 %12, i8 0, i64 40, i1 true)
+// CHECK: ret void
+// CHECK: }
+
+void test()
+{
+ static int a1[10] = {0,1,2};
+ static int a2[10] = {0,0,0};
+ static int a3[10] = {0};
+ static int a4[10];
+
+ const int b1[10] = {0,1,2};
+ const int b2[10] = {0,0,0};
+ const int b3[10] = {0};
+
+ int c1[10] = {0,1,2};
+ int c2[10] = {0,0,0};
+ int c3[10] = {0};
+ int c4[10];
+
+ volatile int d1[10] = {0,1,2};
+ volatile int d2[10] = {0,0,0};
+ volatile int d3[10] = {0};
+ volatile int d4[10];
+}
Index: lib/CodeGen/CGExprConstant.cpp
===================================================================
--- lib/CodeGen/CGExprConstant.cpp
+++ lib/CodeGen/CGExprConstant.cpp
@@ -859,9 +859,10 @@
// Copy initializer elements.
SmallVector<llvm::Constant*, 16> Elts;
- Elts.reserve(NumInitableElts + NumElements);
+ Elts.reserve(std::max(NumInitableElts, NumElements));
bool RewriteType = false;
+ bool AllNullValues = true;
for (unsigned i = 0; i < NumInitableElts; ++i) {
Expr *Init = ILE->getInit(i);
llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType);
@@ -869,8 +870,15 @@
return nullptr;
RewriteType |= (C->getType() != ElemTy);
Elts.push_back(C);
+ if (!C->isNullValue())
+ AllNullValues = false;
}
+ // If all initializer elements are "zero," then avoid storing NumElements
+ // instances of the zero representation.
+ if (AllNullValues)
+ return llvm::ConstantAggregateZero::get(AType);
+
RewriteType |= (fillC->getType() != ElemTy);
Elts.resize(NumElements, fillC);
@@ -877,7 +885,7 @@
if (RewriteType) {
// FIXME: Try to avoid packing the array
std::vector<llvm::Type*> Types;
- Types.reserve(NumInitableElts + NumElements);
+ Types.reserve(Elts.size());
for (unsigned i = 0, e = Elts.size(); i < e; ++i)
Types.push_back(Elts[i]->getType());
llvm::StructType *SType = llvm::StructType::get(AType->getContext(),
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits