mattd created this revision.
mattd added a reviewer: majnemer.

This change avoids the overhead of storing, and later crawling,
an initializer list of all zeros for arrays. When LLVM
visits this (llvm/IR/Constants.cpp) ConstantArray::getImpl()
it will scan the list looking for an array of all zero.

We can avoid the store, and short-cut the scan, by detecting
all zeros when clang builds-up the initialization representation.

This was brought to my attention when investigating PR36030


https://reviews.llvm.org/D42549

Files:
  lib/CodeGen/CGExprConstant.cpp
  test/CodeGen/array-init.c

Index: test/CodeGen/array-init.c
===================================================================
--- test/CodeGen/array-init.c
+++ test/CodeGen/array-init.c
@@ -0,0 +1,65 @@
+// RUN: %clang_cc1 %s -O0 -triple x86_64-unknown-linux-gnu -emit-llvm -o - | FileCheck %s
+
+// CHECK: @test.a1 = internal global [10 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
+// CHECK: @test.a2 = internal global [10 x i32] zeroinitializer, align 16
+// CHECK: @test.a3 = internal global [10 x i32] zeroinitializer, align 16
+// CHECK: @test.a4 = internal global [10 x i32] zeroinitializer, align 16
+// CHECK: @test.b1 = internal constant [10 x i32] [i32 0, i32 1, i32 2, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
+// CHECK: @test.b2 = internal constant [10 x i32] zeroinitializer, align 16
+// CHECK: @test.b3 = internal constant [10 x i32] zeroinitializer, align 16
+
+// CHECK: define void @test() #0 {
+// CHECK:   %c1 = alloca [10 x i32], align 16
+// CHECK:   %c2 = alloca [10 x i32], align 16
+// CHECK:   %c3 = alloca [10 x i32], align 16
+// CHECK:   %c4 = alloca [10 x i32], align 16
+// CHECK:   %d1 = alloca [10 x i32], align 16
+// CHECK:   %d2 = alloca [10 x i32], align 16
+// CHECK:   %d3 = alloca [10 x i32], align 16
+// CHECK:   %d4 = alloca [10 x i32], align 16
+// CHECK:   %1 = bitcast [10 x i32]* %c1 to i8*
+// CHECK:   call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 40, i1 false)
+// CHECK:   %2 = bitcast i8* %1 to [10 x i32]*
+// CHECK:   %3 = getelementptr [10 x i32], [10 x i32]* %2, i32 0, i32 1
+// CHECK:   store i32 1, i32* %3
+// CHECK:   %4 = getelementptr [10 x i32], [10 x i32]* %2, i32 0, i32 2
+// CHECK:   store i32 2, i32* %4
+// CHECK:   %5 = bitcast [10 x i32]* %c2 to i8*
+// CHECK:   call void @llvm.memset.p0i8.i64(i8* align 16 %5, i8 0, i64 40, i1 false)
+// CHECK:   %6 = bitcast [10 x i32]* %c3 to i8*
+// CHECK:   call void @llvm.memset.p0i8.i64(i8* align 16 %6, i8 0, i64 40, i1 false)
+// CHECK:   %7 = bitcast [10 x i32]* %d1 to i8*
+// CHECK:   call void @llvm.memset.p0i8.i64(i8* align 16 %7, i8 0, i64 40, i1 true)
+// CHECK:   %8 = bitcast i8* %7 to [10 x i32]*
+// CHECK:   %9 = getelementptr [10 x i32], [10 x i32]* %8, i32 0, i32 1
+// CHECK:   store volatile i32 1, i32* %9
+// CHECK:   %10 = getelementptr [10 x i32], [10 x i32]* %8, i32 0, i32 2
+// CHECK:   store volatile i32 2, i32* %10
+// CHECK:   %11 = bitcast [10 x i32]* %d2 to i8*
+// CHECK:   call void @llvm.memset.p0i8.i64(i8* align 16 %11, i8 0, i64 40, i1 true)
+// CHECK:   %12 = bitcast [10 x i32]* %d3 to i8*
+// CHECK:   call void @llvm.memset.p0i8.i64(i8* align 16 %12, i8 0, i64 40, i1 true)
+// CHECK:   ret void
+// CHECK: }
+
+void test()
+{
+  static int a1[10] = {0,1,2};
+  static int a2[10] = {0,0,0};
+  static int a3[10] = {0};
+  static int a4[10];
+
+  const int b1[10] = {0,1,2};
+  const int b2[10] = {0,0,0};
+  const int b3[10] = {0};
+  
+  int c1[10] = {0,1,2};
+  int c2[10] = {0,0,0};
+  int c3[10] = {0};
+  int c4[10];
+  
+  volatile int d1[10] = {0,1,2};
+  volatile int d2[10] = {0,0,0};
+  volatile int d3[10] = {0};
+  volatile int d4[10];
+}
Index: lib/CodeGen/CGExprConstant.cpp
===================================================================
--- lib/CodeGen/CGExprConstant.cpp
+++ lib/CodeGen/CGExprConstant.cpp
@@ -859,9 +859,10 @@
 
     // Copy initializer elements.
     SmallVector<llvm::Constant*, 16> Elts;
-    Elts.reserve(NumInitableElts + NumElements);
+    Elts.reserve(std::max(NumInitableElts, NumElements));
 
     bool RewriteType = false;
+    bool AllNullValues = true;
     for (unsigned i = 0; i < NumInitableElts; ++i) {
       Expr *Init = ILE->getInit(i);
       llvm::Constant *C = Emitter.tryEmitPrivateForMemory(Init, EltType);
@@ -869,8 +870,15 @@
         return nullptr;
       RewriteType |= (C->getType() != ElemTy);
       Elts.push_back(C);
+      if (!C->isNullValue())
+        AllNullValues = false;
     }
 
+    // If all initializer elements are "zero," then avoid storing NumElements
+    // instances of the zero representation.
+    if (AllNullValues)
+      return llvm::ConstantAggregateZero::get(AType);
+
     RewriteType |= (fillC->getType() != ElemTy);
     Elts.resize(NumElements, fillC);
 
@@ -877,7 +885,7 @@
     if (RewriteType) {
       // FIXME: Try to avoid packing the array
       std::vector<llvm::Type*> Types;
-      Types.reserve(NumInitableElts + NumElements);
+      Types.reserve(Elts.size());
       for (unsigned i = 0, e = Elts.size(); i < e; ++i)
         Types.push_back(Elts[i]->getType());
       llvm::StructType *SType = llvm::StructType::get(AType->getContext(),
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to