[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-02-15 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From ead2aca22a0f964d0f316e5ea7a5e3967844c015 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

This patch leverages on the fact that padding bytes have an undefined
value, which changes the previous undefined behavior which previously was
zero in case of partial initialization.

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 98 +++
 .../test/CodeGenCXX/trivial-auto-var-init.cpp | 34 ++-
 .../test/CodeGenOpenCL/partial_initializer.cl |  3 +-
 3 files changed, 114 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..6f7674c6b701eb 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,11 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa(C) ||
+ isa(C) || isa(C);
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -945,48 +950,89 @@ static bool 
canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-llvm::Constant *Init, Address Loc,
-bool isVolatile, CGBuilderTy &Builder,
-bool IsAutoInit) {
+static uint64_t emitStoresForInitAfterBZero(CodeGenModule &CGM,
+llvm::Constant *Init, Address Loc,
+bool isVolatile,
+CGBuilderTy &Builder,
+bool IsAutoInit) {
   assert(!Init->isNullValue() && !isa(Init) &&
  "called emitStoresForInitAfterBZero for zero or undef value.");
 
+  auto const &DL = CGM.getDataLayout();
+
   if (isa(Init) || isa(Init) ||
   isa(Init) || isa(Init) ||
   isa(Init)) {
 auto *I = Builder.CreateStore(Init, Loc, isVolatile);
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
-return;
+return DL.getTypeAllocSize(Init->getType());
   }
 
   if (llvm::ConstantDataSequential *CDS =
   dyn_cast(Init)) {
+bool CountNonNullBytes = true;
+uint64_t LeadingNonNullElementsCount = 0;
 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
   llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
   // If necessary, get a pointer to the element and emit it.
-  if (!Elt->isNullValue() && !isa(Elt))
+  if (!isNullOrUndef(Elt)) {
 emitStoresForInitAfterBZero(
 CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), 
isVolatile,
 Builder, IsAutoInit);
+if (CountNonNullBytes)
+  ++LeadingNonNullElementsCount;
+  } else if (CountNonNullBytes)
+CountNonNullBytes = false;
 }
-return;
+uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+return LeadingNonNullElementsCount * ElementByteCount;
   }
 
   assert((isa(Init) || isa(Init)) &&
  "Unknown value type!");
 
+  bool CountNonNullBytes = true;
+  uint64_t Offset = DL.getTypeAllocSize(Init->getType());
+
   for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
-llvm::Constant *Elt = cast(Init->getOperand(i));
+llvm::Constant *Operand = cast(Init->getOperand(i));
+uint64_t OperandByteCount = DL.getTypeAllocSize(Operand->getType());
+
+uint64_t OperandOffset;
+if (isNullOrUndef(Operand)) {
+  OperandOffset = 0;
+} else {
+  // If necessary, get a pointer to the element and emit it.
+  OperandOffset = emitStoresForInitAfterBZero(
+  CGM, Operand, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i),
+  isVolatile, Builder, IsAutoInit);
+}
 
-// If necessary, get a pointer to the element and emit it.
-if (!Elt->isNullValue() && !isa(Elt))
-  emitStoresForInitAfterBZero(CGM, Elt,
-  Builder.CreateConstInBoundsGEP2_32(Loc, 0, 
i),
-  isVolatile, Builder, IsAutoInit);
+if (CountNonNullBytes) {
+  if (OperandOffset != OperandByteCount) {
+CountNonNullBytes = false;
+
+// Add the offset of current field.
+if (auto *CS = dyn_cast(Init)) {
+  llvm::StructType *CST = CS->getType();
+  const llvm::StructLa

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-02-15 Thread via cfe-commits

serge-sans-paille wrote:

> It's not clear to me this is actually consistently profitable if the computed 
> offset is small. If we have to emit a memset starting at a weird offset, the 
> code might get worse overall. (e.g. on x86, a memset of 32 bytes is three 
> instructions; a memset of 31 bytes is roughly 6.) Not sure how much we should 
> rely on the optimizer to fixup clean up cases like that.
> 
> We probably need to mess with the way emitStoresForInitAfterBZero handles 
> padding... see #78034 . (Not directly caused by your patch, but it closely 
> interacts with the same code.)

I've updated the patch with a few bug fixes, and one of its noticeable effect 
is that it relies on the fact that padding bytes have undefined value. 
Especially for partial init, current clang padds to zero while with this patch 
leaves it undefined.

With current version, I can recompile LLVM with the modified clang and have it 
pass its validation, so it looks rather good.

I haven't sorted out the question of odd memset numbers.

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-25 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From e2231f4de6e835618cf30b0bab6b3cc72839dc94 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 60 ++-
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |  6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |  3 +-
 3 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244f..e5a2561502afa99 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,47 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static size_t
+CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) {
+  // Zero and Undef never requires any extra stores.
+  if (Init->isNullValue() || isa(Init) ||
+  isa(Init) || isa(Init))
+return 0u;
+  if (isa(Init) || isa(Init) ||
+  isa(Init) || isa(Init) ||
+  isa(Init))
+return DL.getTypeAllocSize(Init->getType());
+
+  // See if we can emit each element.
+  if (isa(Init) || isa(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+  llvm::Constant *Elt = cast(Init->getOperand(i));
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+  dyn_cast(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+  llvm::Constant *Elt = CDS->getElementAsConstant(i);
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  // Anything else is hard and scary.
+  return 0;
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1250,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+// llvm::errs() << LeadingNonNullBytes << " out of " << ConstantSize <<
+// "\n";
+
+llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+Address AdjustedLoc =
+LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+  Loc.withElementType(CGM.Int8Ty),
+  CharUnits::fromQuantity(LeadingNonNullBytes))
+: Loc;
+auto *I = Builder.CreateMemSet(
+AdjustedLoc, Z8,
+llvm::ConstantInt::get(CGM.IntPtrTy,
+   ConstantSize - LeadingNonNullBytes),
+isVolatile);
+
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
 
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp 
b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f84..b0deb8149ed936f 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
 
 // UNINIT-LABEL:  test_huge_small_init(
 // ZERO-LABEL:test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // ZERO-NOT: !annotation
 // ZERO: store i8 97,
 // ZERO: store i8 98,
 // ZERO: store i8 99,
 // ZERO: store i8 100,
 // PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // PATTERN-NOT: !annotation
 // PATTERN: store i8 97,
 // PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl 
b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003a1..7c01c750d1afef2 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/p

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-25 Thread via cfe-commits

github-actions[bot] wrote:




:warning: C/C++ code formatter, clang-format found issues in your code. 
:warning:



You can test this locally with the following command:


``bash
git-clang-format --diff e99edf6bcb20169e153110426f840a2dfeeec66d 
e2231f4de6e835618cf30b0bab6b3cc72839dc94 -- clang/lib/CodeGen/CGDecl.cpp 
clang/test/CodeGenCXX/trivial-auto-var-init.cpp
``





View the diff from clang-format here.


``diff
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index e5a2561502..bc03909c1c 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,8 +905,8 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
-static size_t
-CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) {
+static size_t CountLeadingNonNullBytes(const llvm::DataLayout &DL,
+   llvm::Constant *Init) {
   // Zero and Undef never requires any extra stores.
   if (Init->isNullValue() || isa(Init) ||
   isa(Init) || isa(Init))

``




https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-26 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From 1ca9d2d7697528b1a126cc95a2fb7a9e5bb8669a Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 109 +-
 clang/test/CodeGen/array-init.c   |  50 
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |   6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |   3 +-
 4 files changed, 163 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244f..724dcf6464aef10 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,98 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa(C) ||
+ isa(C) || isa(C);
+}
+
+static size_t CountLeadingNonNullBytes(const llvm::DataLayout &DL,
+   llvm::Constant *Init) {
+  // Zero and Undef never requires any extra stores.
+  if (isNullOrUndef(Init))
+return 0u;
+
+  if (isa(Init) || isa(Init) ||
+  isa(Init) || isa(Init) ||
+  isa(Init))
+return DL.getTypeAllocSize(Init->getType());
+
+  // For array, consider each element independently
+  if (auto *CA = dyn_cast(Init)) {
+
+llvm::ArrayType *CAT = CA->getType();
+uint64_t NumElements = CAT->getNumElements();
+uint64_t ElementByteCount = DL.getTypeAllocSize(CAT->getElementType());
+
+unsigned LeadingNonNullElementsCount = 0;
+for (; LeadingNonNullElementsCount != NumElements;
+ ++LeadingNonNullElementsCount) {
+  auto *Elt =
+  cast(Init->getOperand(LeadingNonNullElementsCount));
+  if (isNullOrUndef(Elt))
+break;
+}
+
+unsigned TrailingNonNullBytes = 0;
+if (LeadingNonNullElementsCount != 0) {
+  LeadingNonNullElementsCount -= 1;
+  TrailingNonNullBytes = CountLeadingNonNullBytes(
+  DL,
+  cast(Init->getOperand(LeadingNonNullElementsCount)));
+}
+
+return LeadingNonNullElementsCount * ElementByteCount +
+   TrailingNonNullBytes;
+;
+  }
+
+  // For records, per field
+  if (auto *CS = dyn_cast(Init)) {
+llvm::StructType *CST = CS->getType();
+const llvm::StructLayout *SL = DL.getStructLayout(CST);
+
+uint64_t NumElements = CST->getNumElements();
+uint64_t LeadingNonNullFieldsCount = 0;
+
+for (; LeadingNonNullFieldsCount != NumElements;
+ ++LeadingNonNullFieldsCount) {
+  auto *Elt =
+  cast(Init->getOperand(LeadingNonNullFieldsCount));
+  if (isNullOrUndef(Elt))
+break;
+}
+
+unsigned TrailingNonNullBytes = 0;
+if (LeadingNonNullFieldsCount != 0) {
+  LeadingNonNullFieldsCount -= 1;
+  TrailingNonNullBytes = CountLeadingNonNullBytes(
+  DL,
+  cast(Init->getOperand(LeadingNonNullFieldsCount)));
+}
+
+return SL->getElementOffset(LeadingNonNullFieldsCount) +
+   TrailingNonNullBytes;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+  dyn_cast(Init)) {
+size_t LeadingNonNullElementCount = 0;
+uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+for (unsigned NumElements = CDS->getNumElements();
+ LeadingNonNullElementCount != NumElements;
+ ++LeadingNonNullElementCount) {
+  llvm::Constant *Elt =
+  CDS->getElementAsConstant(LeadingNonNullElementCount);
+  if (isNullOrUndef(Elt))
+break;
+}
+return LeadingNonNullElementCount * ElementByteCount;
+  }
+
+  // Anything else is hard and scary.
+  return 0;
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1301,21 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+
+llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+Address AdjustedLoc =
+LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+  Loc.withElementType(CGM.Int8Ty),
+  CharUnits::fromQuantity(LeadingNonNullBytes))
+: Loc;
+auto *I 

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-26 Thread via cfe-commits

serge-sans-paille wrote:

> (Extra tests needed)

test added, ready for review

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-29 Thread via cfe-commits

serge-sans-paille wrote:

@nikic / @AaronBallman any thought on the general approach?

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-29 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From 376f62b2aa3e1c2ec37b53a66f7525ae02e54172 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 109 +-
 clang/test/CodeGen/array-init.c   |  50 
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |   6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |   3 +-
 4 files changed, 163 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244..724dcf6464aef1 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,98 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa(C) ||
+ isa(C) || isa(C);
+}
+
+static size_t CountLeadingNonNullBytes(const llvm::DataLayout &DL,
+   llvm::Constant *Init) {
+  // Zero and Undef never requires any extra stores.
+  if (isNullOrUndef(Init))
+return 0u;
+
+  if (isa(Init) || isa(Init) ||
+  isa(Init) || isa(Init) ||
+  isa(Init))
+return DL.getTypeAllocSize(Init->getType());
+
+  // For array, consider each element independently
+  if (auto *CA = dyn_cast(Init)) {
+
+llvm::ArrayType *CAT = CA->getType();
+uint64_t NumElements = CAT->getNumElements();
+uint64_t ElementByteCount = DL.getTypeAllocSize(CAT->getElementType());
+
+unsigned LeadingNonNullElementsCount = 0;
+for (; LeadingNonNullElementsCount != NumElements;
+ ++LeadingNonNullElementsCount) {
+  auto *Elt =
+  cast(Init->getOperand(LeadingNonNullElementsCount));
+  if (isNullOrUndef(Elt))
+break;
+}
+
+unsigned TrailingNonNullBytes = 0;
+if (LeadingNonNullElementsCount != 0) {
+  LeadingNonNullElementsCount -= 1;
+  TrailingNonNullBytes = CountLeadingNonNullBytes(
+  DL,
+  cast(Init->getOperand(LeadingNonNullElementsCount)));
+}
+
+return LeadingNonNullElementsCount * ElementByteCount +
+   TrailingNonNullBytes;
+;
+  }
+
+  // For records, per field
+  if (auto *CS = dyn_cast(Init)) {
+llvm::StructType *CST = CS->getType();
+const llvm::StructLayout *SL = DL.getStructLayout(CST);
+
+uint64_t NumElements = CST->getNumElements();
+uint64_t LeadingNonNullFieldsCount = 0;
+
+for (; LeadingNonNullFieldsCount != NumElements;
+ ++LeadingNonNullFieldsCount) {
+  auto *Elt =
+  cast(Init->getOperand(LeadingNonNullFieldsCount));
+  if (isNullOrUndef(Elt))
+break;
+}
+
+unsigned TrailingNonNullBytes = 0;
+if (LeadingNonNullFieldsCount != 0) {
+  LeadingNonNullFieldsCount -= 1;
+  TrailingNonNullBytes = CountLeadingNonNullBytes(
+  DL,
+  cast(Init->getOperand(LeadingNonNullFieldsCount)));
+}
+
+return SL->getElementOffset(LeadingNonNullFieldsCount) +
+   TrailingNonNullBytes;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+  dyn_cast(Init)) {
+size_t LeadingNonNullElementCount = 0;
+uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+for (unsigned NumElements = CDS->getNumElements();
+ LeadingNonNullElementCount != NumElements;
+ ++LeadingNonNullElementCount) {
+  llvm::Constant *Elt =
+  CDS->getElementAsConstant(LeadingNonNullElementCount);
+  if (isNullOrUndef(Elt))
+break;
+}
+return LeadingNonNullElementCount * ElementByteCount;
+  }
+
+  // Anything else is hard and scary.
+  return 0;
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1301,21 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+
+llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+Address AdjustedLoc =
+LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+  Loc.withElementType(CGM.Int8Ty),
+  CharUnits::fromQuantity(LeadingNonNullBytes))
+: Loc;
+auto *I = 

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-30 Thread Eli Friedman via cfe-commits


@@ -1209,8 +1301,21 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);

efriedma-quic wrote:

I don't like the way this duplicates the logic.  You aren't really trying to 
count bytes that are "non-null"; you're trying to find the offset of the first 
byte that isn't overwritten by emitStoresForInitAfterBZero.  The best place to 
compute that is emitStoresForInitAfterBZero itself.

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-31 Thread via cfe-commits


@@ -1209,8 +1301,21 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);

serge-sans-paille wrote:

I agree, what about the updated version then?

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-31 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From 5ecd085ebf94d8382290cf30ceec704102b7a0af Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 81 +++
 clang/test/CodeGen/array-init.c   | 50 
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |  6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |  3 +-
 4 files changed, 121 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c1724..e243c10a2b86a 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,11 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa(C) ||
+ isa(C) || isa(C);
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -945,48 +950,77 @@ static bool 
canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-llvm::Constant *Init, Address Loc,
-bool isVolatile, CGBuilderTy &Builder,
-bool IsAutoInit) {
+static size_t
+emitStoresForInitAfterBZero(CodeGenModule &CGM, llvm::Constant *Init,
+Address Loc, bool isVolatile, CGBuilderTy &Builder,
+bool IsAutoInit) {
   assert(!Init->isNullValue() && !isa(Init) &&
  "called emitStoresForInitAfterBZero for zero or undef value.");
 
+  auto const &DL = CGM.getDataLayout();
+
   if (isa(Init) || isa(Init) ||
   isa(Init) || isa(Init) ||
   isa(Init)) {
 auto *I = Builder.CreateStore(Init, Loc, isVolatile);
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
-return;
+return DL.getTypeAllocSize(Init->getType());
   }
 
   if (llvm::ConstantDataSequential *CDS =
   dyn_cast(Init)) {
+bool CountNonNullBytes = true;
+size_t LeadingNonNullElementsCount = 0;
 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
   llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
   // If necessary, get a pointer to the element and emit it.
-  if (!Elt->isNullValue() && !isa(Elt))
+  if (!isNullOrUndef(Elt)) {
 emitStoresForInitAfterBZero(
 CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), 
isVolatile,
 Builder, IsAutoInit);
+LeadingNonNullElementsCount += CountNonNullBytes;
+  } else if (CountNonNullBytes)
+CountNonNullBytes = false;
 }
-return;
+uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+return LeadingNonNullElementsCount * ElementByteCount;
   }
 
   assert((isa(Init) || isa(Init)) &&
  "Unknown value type!");
 
+  bool CountNonNullBytes = true;
+  uint64_t Offset = DL.getTypeAllocSize(Init->getType());
+  uint64_t PrevOperandOffset = 0;
   for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
 llvm::Constant *Elt = cast(Init->getOperand(i));
 
 // If necessary, get a pointer to the element and emit it.
-if (!Elt->isNullValue() && !isa(Elt))
-  emitStoresForInitAfterBZero(CGM, Elt,
-  Builder.CreateConstInBoundsGEP2_32(Loc, 0, 
i),
-  isVolatile, Builder, IsAutoInit);
+if (!isNullOrUndef(Elt)) {
+  PrevOperandOffset = emitStoresForInitAfterBZero(
+  CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile,
+  Builder, IsAutoInit);
+} else if (CountNonNullBytes) {
+  CountNonNullBytes = false;
+  uint64_t OperandOffset = 0;
+  if (i) {
+if (auto *CS = dyn_cast(Init)) {
+  llvm::StructType *CST = CS->getType();
+  const llvm::StructLayout *SL = DL.getStructLayout(CST);
+  OperandOffset = SL->getElementOffset(i - 1);
+} else if (auto *CA = dyn_cast(Init)) {
+  llvm::ArrayType *CAT = CA->getType();
+  uint64_t ElementByteCount =
+  DL.getTypeAllocSize(CAT->getElementType());
+  OperandOffset = ElementByteCount * (i - 1);
+}
+  }
+  Offset = PrevOperandOffset + OperandOffset;
+}
   }
+  return Offset;
 }
 
 /// Decide whether we should use bzero plus some stores to initialize a local
@@ -1205,12 +1239,13 @@ 

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-31 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From 2e1955b49402cae24835fd5346f073f039acc975 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 81 +++
 clang/test/CodeGen/array-init.c   | 50 
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |  6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |  3 +-
 4 files changed, 121 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c1724..17d5a354a27bb 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,11 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa(C) ||
+ isa(C) || isa(C);
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -945,48 +950,77 @@ static bool 
canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-llvm::Constant *Init, Address Loc,
-bool isVolatile, CGBuilderTy &Builder,
-bool IsAutoInit) {
+static size_t emitStoresForInitAfterBZero(CodeGenModule &CGM,
+  llvm::Constant *Init, Address Loc,
+  bool isVolatile, CGBuilderTy 
&Builder,
+  bool IsAutoInit) {
   assert(!Init->isNullValue() && !isa(Init) &&
  "called emitStoresForInitAfterBZero for zero or undef value.");
 
+  auto const &DL = CGM.getDataLayout();
+
   if (isa(Init) || isa(Init) ||
   isa(Init) || isa(Init) ||
   isa(Init)) {
 auto *I = Builder.CreateStore(Init, Loc, isVolatile);
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
-return;
+return DL.getTypeAllocSize(Init->getType());
   }
 
   if (llvm::ConstantDataSequential *CDS =
   dyn_cast(Init)) {
+bool CountNonNullBytes = true;
+size_t LeadingNonNullElementsCount = 0;
 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
   llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
   // If necessary, get a pointer to the element and emit it.
-  if (!Elt->isNullValue() && !isa(Elt))
+  if (!isNullOrUndef(Elt)) {
 emitStoresForInitAfterBZero(
 CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), 
isVolatile,
 Builder, IsAutoInit);
+LeadingNonNullElementsCount += CountNonNullBytes;
+  } else if (CountNonNullBytes)
+CountNonNullBytes = false;
 }
-return;
+uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+return LeadingNonNullElementsCount * ElementByteCount;
   }
 
   assert((isa(Init) || isa(Init)) &&
  "Unknown value type!");
 
+  bool CountNonNullBytes = true;
+  uint64_t Offset = DL.getTypeAllocSize(Init->getType());
+  uint64_t PrevOperandOffset = 0;
   for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
 llvm::Constant *Elt = cast(Init->getOperand(i));
 
 // If necessary, get a pointer to the element and emit it.
-if (!Elt->isNullValue() && !isa(Elt))
-  emitStoresForInitAfterBZero(CGM, Elt,
-  Builder.CreateConstInBoundsGEP2_32(Loc, 0, 
i),
-  isVolatile, Builder, IsAutoInit);
+if (!isNullOrUndef(Elt)) {
+  PrevOperandOffset = emitStoresForInitAfterBZero(
+  CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), isVolatile,
+  Builder, IsAutoInit);
+} else if (CountNonNullBytes) {
+  CountNonNullBytes = false;
+  uint64_t OperandOffset = 0;
+  if (i) {
+if (auto *CS = dyn_cast(Init)) {
+  llvm::StructType *CST = CS->getType();
+  const llvm::StructLayout *SL = DL.getStructLayout(CST);
+  OperandOffset = SL->getElementOffset(i - 1);
+} else if (auto *CA = dyn_cast(Init)) {
+  llvm::ArrayType *CAT = CA->getType();
+  uint64_t ElementByteCount =
+  DL.getTypeAllocSize(CAT->getElementType());
+  OperandOffset = ElementByteCount * (i - 1);
+}
+  }
+  Offset = PrevOperandOffset + OperandOffset;
+}
   }
+  return Offset;
 }
 
 /// Decide whether we should us

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-31 Thread Eli Friedman via cfe-commits

https://github.com/efriedma-quic commented:

It's not clear to me this is actually consistently profitable if the computed 
offset is small.  If we have to emit a memset starting at a weird offset, the 
code might get worse overall.  (e.g. on x86, a memset of 32 bytes is three 
instructions; a memset of 31 bytes is roughly 6.)  Not sure how much we should 
rely on the optimizer to fixup clean up cases like that.

We probably need to mess with the way emitStoresForInitAfterBZero handles 
padding... see #78034 .  (Not directly caused by your patch, but it closely 
interacts with the same code.)

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-31 Thread Eli Friedman via cfe-commits


@@ -945,48 +950,77 @@ static bool 
canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-llvm::Constant *Init, Address Loc,
-bool isVolatile, CGBuilderTy &Builder,
-bool IsAutoInit) {
+static size_t emitStoresForInitAfterBZero(CodeGenModule &CGM,
+  llvm::Constant *Init, Address Loc,
+  bool isVolatile, CGBuilderTy 
&Builder,
+  bool IsAutoInit) {
   assert(!Init->isNullValue() && !isa(Init) &&
  "called emitStoresForInitAfterBZero for zero or undef value.");
 
+  auto const &DL = CGM.getDataLayout();
+
   if (isa(Init) || isa(Init) ||
   isa(Init) || isa(Init) ||
   isa(Init)) {
 auto *I = Builder.CreateStore(Init, Loc, isVolatile);
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
-return;
+return DL.getTypeAllocSize(Init->getType());
   }
 
   if (llvm::ConstantDataSequential *CDS =
   dyn_cast(Init)) {
+bool CountNonNullBytes = true;
+size_t LeadingNonNullElementsCount = 0;
 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
   llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
   // If necessary, get a pointer to the element and emit it.
-  if (!Elt->isNullValue() && !isa(Elt))
+  if (!isNullOrUndef(Elt)) {
 emitStoresForInitAfterBZero(
 CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), 
isVolatile,
 Builder, IsAutoInit);
+LeadingNonNullElementsCount += CountNonNullBytes;

efriedma-quic wrote:

This is too clever; just write `if (CountNonNullBytes) 
++LeadingNonNullElementsCount;`

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-31 Thread Eli Friedman via cfe-commits


@@ -945,48 +950,77 @@ static bool 
canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-llvm::Constant *Init, Address Loc,
-bool isVolatile, CGBuilderTy &Builder,
-bool IsAutoInit) {
+static size_t emitStoresForInitAfterBZero(CodeGenModule &CGM,

efriedma-quic wrote:

Use uint64_t for struct/array offsets.

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-31 Thread Eli Friedman via cfe-commits

https://github.com/efriedma-quic edited 
https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-02-01 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From 707050175b73e61e3be8da7c2b4683be8afa6db7 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 101 ++
 clang/test/CodeGen/array-init.c   |  50 +
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |   6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |   3 +-
 4 files changed, 139 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c1724..a9ae06bd77c32 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,11 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa(C) ||
+ isa(C) || isa(C);
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -945,48 +950,90 @@ static bool 
canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-llvm::Constant *Init, Address Loc,
-bool isVolatile, CGBuilderTy &Builder,
-bool IsAutoInit) {
+static uint64_t emitStoresForInitAfterBZero(CodeGenModule &CGM,
+llvm::Constant *Init, Address Loc,
+bool isVolatile,
+CGBuilderTy &Builder,
+bool IsAutoInit) {
   assert(!Init->isNullValue() && !isa(Init) &&
  "called emitStoresForInitAfterBZero for zero or undef value.");
 
+  auto const &DL = CGM.getDataLayout();
+
   if (isa(Init) || isa(Init) ||
   isa(Init) || isa(Init) ||
   isa(Init)) {
 auto *I = Builder.CreateStore(Init, Loc, isVolatile);
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
-return;
+return DL.getTypeAllocSize(Init->getType());
   }
 
   if (llvm::ConstantDataSequential *CDS =
   dyn_cast(Init)) {
+bool CountNonNullBytes = true;
+uint64_t LeadingNonNullElementsCount = 0;
 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
   llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
   // If necessary, get a pointer to the element and emit it.
-  if (!Elt->isNullValue() && !isa(Elt))
+  if (!isNullOrUndef(Elt)) {
 emitStoresForInitAfterBZero(
 CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), 
isVolatile,
 Builder, IsAutoInit);
+if (CountNonNullBytes)
+  ++LeadingNonNullElementsCount;
+  } else if (CountNonNullBytes)
+CountNonNullBytes = false;
 }
-return;
+uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+return LeadingNonNullElementsCount * ElementByteCount;
   }
 
   assert((isa(Init) || isa(Init)) &&
  "Unknown value type!");
 
+  bool CountNonNullBytes = true;
+  uint64_t Offset = DL.getTypeAllocSize(Init->getType());
+
   for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
-llvm::Constant *Elt = cast(Init->getOperand(i));
+llvm::Constant *Operand = cast(Init->getOperand(i));
+uint64_t OperandByteCount = DL.getTypeAllocSize(Operand->getType());
 
-// If necessary, get a pointer to the element and emit it.
-if (!Elt->isNullValue() && !isa(Elt))
-  emitStoresForInitAfterBZero(CGM, Elt,
-  Builder.CreateConstInBoundsGEP2_32(Loc, 0, 
i),
-  isVolatile, Builder, IsAutoInit);
+uint64_t OperandOffset;
+if (isNullOrUndef(Operand)) {
+  OperandOffset = 0;
+} else {
+  // If necessary, get a pointer to the element and emit it.
+  OperandOffset = emitStoresForInitAfterBZero(
+  CGM, Operand, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i),
+  isVolatile, Builder, IsAutoInit);
+}
+
+if (CountNonNullBytes) {
+  if (OperandOffset != OperandByteCount) {
+CountNonNullBytes = false;
+
+// When not at the beginning of the constant, add the offset of the
+// previous elements.
+if (i) {
+  if (auto *CS = dyn_cast(Init)) {
+llvm::StructType *CST = CS->getType();
+const llvm::StructLayout *SL = DL.getStructLayout(CST);
+   

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-02-01 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From 1010379cd5e871fc404e58dbce348e953ebaf75c Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 101 ++
 .../test/CodeGenOpenCL/partial_initializer.cl |   3 +-
 2 files changed, 85 insertions(+), 19 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c1724..a9ae06bd77c32 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,11 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+static bool isNullOrUndef(llvm::Constant *C) {
+  return C->isNullValue() || isa(C) ||
+ isa(C) || isa(C);
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -945,48 +950,90 @@ static bool 
canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
 
 /// For inits that canEmitInitWithFewStoresAfterBZero returned true for, emit
 /// the scalar stores that would be required.
-static void emitStoresForInitAfterBZero(CodeGenModule &CGM,
-llvm::Constant *Init, Address Loc,
-bool isVolatile, CGBuilderTy &Builder,
-bool IsAutoInit) {
+static uint64_t emitStoresForInitAfterBZero(CodeGenModule &CGM,
+llvm::Constant *Init, Address Loc,
+bool isVolatile,
+CGBuilderTy &Builder,
+bool IsAutoInit) {
   assert(!Init->isNullValue() && !isa(Init) &&
  "called emitStoresForInitAfterBZero for zero or undef value.");
 
+  auto const &DL = CGM.getDataLayout();
+
   if (isa(Init) || isa(Init) ||
   isa(Init) || isa(Init) ||
   isa(Init)) {
 auto *I = Builder.CreateStore(Init, Loc, isVolatile);
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
-return;
+return DL.getTypeAllocSize(Init->getType());
   }
 
   if (llvm::ConstantDataSequential *CDS =
   dyn_cast(Init)) {
+bool CountNonNullBytes = true;
+uint64_t LeadingNonNullElementsCount = 0;
 for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
   llvm::Constant *Elt = CDS->getElementAsConstant(i);
 
   // If necessary, get a pointer to the element and emit it.
-  if (!Elt->isNullValue() && !isa(Elt))
+  if (!isNullOrUndef(Elt)) {
 emitStoresForInitAfterBZero(
 CGM, Elt, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i), 
isVolatile,
 Builder, IsAutoInit);
+if (CountNonNullBytes)
+  ++LeadingNonNullElementsCount;
+  } else if (CountNonNullBytes)
+CountNonNullBytes = false;
 }
-return;
+uint64_t ElementByteCount = DL.getTypeAllocSize(CDS->getElementType());
+return LeadingNonNullElementsCount * ElementByteCount;
   }
 
   assert((isa(Init) || isa(Init)) &&
  "Unknown value type!");
 
+  bool CountNonNullBytes = true;
+  uint64_t Offset = DL.getTypeAllocSize(Init->getType());
+
   for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
-llvm::Constant *Elt = cast(Init->getOperand(i));
+llvm::Constant *Operand = cast(Init->getOperand(i));
+uint64_t OperandByteCount = DL.getTypeAllocSize(Operand->getType());
 
-// If necessary, get a pointer to the element and emit it.
-if (!Elt->isNullValue() && !isa(Elt))
-  emitStoresForInitAfterBZero(CGM, Elt,
-  Builder.CreateConstInBoundsGEP2_32(Loc, 0, 
i),
-  isVolatile, Builder, IsAutoInit);
+uint64_t OperandOffset;
+if (isNullOrUndef(Operand)) {
+  OperandOffset = 0;
+} else {
+  // If necessary, get a pointer to the element and emit it.
+  OperandOffset = emitStoresForInitAfterBZero(
+  CGM, Operand, Builder.CreateConstInBoundsGEP2_32(Loc, 0, i),
+  isVolatile, Builder, IsAutoInit);
+}
+
+if (CountNonNullBytes) {
+  if (OperandOffset != OperandByteCount) {
+CountNonNullBytes = false;
+
+// When not at the beginning of the constant, add the offset of the
+// previous elements.
+if (i) {
+  if (auto *CS = dyn_cast(Init)) {
+llvm::StructType *CST = CS->getType();
+const llvm::StructLayout *SL = DL.getStructLayout(CST);
+OperandOffset += SL->getElementOffset(i - 1);
+  } else if (auto *CA = dyn_cast(Init)) {
+l

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-25 Thread via cfe-commits

https://github.com/serge-sans-paille created 
https://github.com/llvm/llvm-project/pull/79502

…initialized array

Fix #79500

>From 9aa54f3f4ecfcf2dad80ec3a57a3d41027356e55 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 60 ++-
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |  6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |  3 +-
 3 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index a5da0aa2965a000..101fefc511a0c40 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,47 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+__attribute__((optnone)) static size_t
+CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) {
+  // Zero and Undef never requires any extra stores.
+  if (Init->isNullValue() || isa(Init) ||
+  isa(Init) || isa(Init))
+return 0u;
+  if (isa(Init) || isa(Init) ||
+  isa(Init) || isa(Init) ||
+  isa(Init))
+return DL.getTypeAllocSize(Init->getType());
+
+  // See if we can emit each element.
+  if (isa(Init) || isa(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+  llvm::Constant *Elt = cast(Init->getOperand(i));
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+  dyn_cast(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+  llvm::Constant *Elt = CDS->getElementAsConstant(i);
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  // Anything else is hard and scary.
+  return 0;
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1250,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+// llvm::errs() << LeadingNonNullBytes << " out of " << ConstantSize <<
+// "\n";
+
+llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+Address AdjustedLoc =
+LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+  Loc.withElementType(CGM.Int8Ty),
+  CharUnits::fromQuantity(LeadingNonNullBytes))
+: Loc;
+auto *I = Builder.CreateMemSet(
+AdjustedLoc, Z8,
+llvm::ConstantInt::get(CGM.IntPtrTy,
+   ConstantSize - LeadingNonNullBytes),
+isVolatile);
+
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
 
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp 
b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f84..b0deb8149ed936f 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
 
 // UNINIT-LABEL:  test_huge_small_init(
 // ZERO-LABEL:test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // ZERO-NOT: !annotation
 // ZERO: store i8 97,
 // ZERO: store i8 98,
 // ZERO: store i8 99,
 // ZERO: store i8 100,
 // PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // PATTERN-NOT: !annotation
 // PATTERN: store i8 97,
 // PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl 
b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003a1..7c01c750d1afef2 100644
--- a/clang/test/CodeGenOpenC

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-25 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang-codegen

Author: None (serge-sans-paille)


Changes

…initialized array

Fix #79500

---
Full diff: https://github.com/llvm/llvm-project/pull/79502.diff


3 Files Affected:

- (modified) clang/lib/CodeGen/CGDecl.cpp (+58-2) 
- (modified) clang/test/CodeGenCXX/trivial-auto-var-init.cpp (+4-2) 
- (modified) clang/test/CodeGenOpenCL/partial_initializer.cl (+2-1) 


``diff
diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index a5da0aa2965a00..101fefc511a0c4 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,47 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+__attribute__((optnone)) static size_t
+CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) {
+  // Zero and Undef never requires any extra stores.
+  if (Init->isNullValue() || isa(Init) ||
+  isa(Init) || isa(Init))
+return 0u;
+  if (isa(Init) || isa(Init) ||
+  isa(Init) || isa(Init) ||
+  isa(Init))
+return DL.getTypeAllocSize(Init->getType());
+
+  // See if we can emit each element.
+  if (isa(Init) || isa(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+  llvm::Constant *Elt = cast(Init->getOperand(i));
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+  dyn_cast(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+  llvm::Constant *Elt = CDS->getElementAsConstant(i);
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  // Anything else is hard and scary.
+  return 0;
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1250,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+// llvm::errs() << LeadingNonNullBytes << " out of " << ConstantSize <<
+// "\n";
+
+llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+Address AdjustedLoc =
+LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+  Loc.withElementType(CGM.Int8Ty),
+  CharUnits::fromQuantity(LeadingNonNullBytes))
+: Loc;
+auto *I = Builder.CreateMemSet(
+AdjustedLoc, Z8,
+llvm::ConstantInt::get(CGM.IntPtrTy,
+   ConstantSize - LeadingNonNullBytes),
+isVolatile);
+
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
 
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp 
b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f8..b0deb8149ed936 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
 
 // UNINIT-LABEL:  test_huge_small_init(
 // ZERO-LABEL:test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // ZERO-NOT: !annotation
 // ZERO: store i8 97,
 // ZERO: store i8 98,
 // ZERO: store i8 99,
 // ZERO: store i8 100,
 // PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // PATTERN-NOT: !annotation
 // PATTERN: store i8 97,
 // PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl 
b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003a..7c01c750d1afef 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/clang/test/CodeGenOpenCL/partial_initializer.cl
@@ -35,7 +35,8 @@ void f(void) {
   // CHECK: %[[compoundliteral1:.*]] = alloca <2 x i32>, align 8
   // CHECK: %[[V2:

[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-25 Thread via cfe-commits

serge-sans-paille wrote:

(Extra tests needed)

https://github.com/llvm/llvm-project/pull/79502
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [clang] Only set the trailing bytes to zero when filling a partially … (PR #79502)

2024-01-25 Thread via cfe-commits

https://github.com/serge-sans-paille updated 
https://github.com/llvm/llvm-project/pull/79502

>From e9151018b7a086b167db394caadb74e20dc27711 Mon Sep 17 00:00:00 2001
From: serge-sans-paille 
Date: Thu, 25 Jan 2024 22:12:55 +0100
Subject: [PATCH] [clang] Only set the trailing bytes to zero when filling a
 partially initialized array

Fix #79500
---
 clang/lib/CodeGen/CGDecl.cpp  | 60 ++-
 .../test/CodeGenCXX/trivial-auto-var-init.cpp |  6 +-
 .../test/CodeGenOpenCL/partial_initializer.cl |  3 +-
 3 files changed, 64 insertions(+), 5 deletions(-)

diff --git a/clang/lib/CodeGen/CGDecl.cpp b/clang/lib/CodeGen/CGDecl.cpp
index bbe14ef4c17244f..9e7f2f15c6e0982 100644
--- a/clang/lib/CodeGen/CGDecl.cpp
+++ b/clang/lib/CodeGen/CGDecl.cpp
@@ -905,6 +905,47 @@ void CodeGenFunction::EmitScalarInit(const Expr *init, 
const ValueDecl *D,
   EmitStoreOfScalar(value, lvalue, /* isInitialization */ true);
 }
 
+__attribute__((optnone)) static size_t
+CountLeadingNonNullBytes(const llvm::DataLayout &DL, llvm::Constant *Init) {
+  // Zero and Undef never requires any extra stores.
+  if (Init->isNullValue() || isa(Init) ||
+  isa(Init) || isa(Init))
+return 0u;
+  if (isa(Init) || isa(Init) ||
+  isa(Init) || isa(Init) ||
+  isa(Init))
+return DL.getTypeAllocSize(Init->getType());
+
+  // See if we can emit each element.
+  if (isa(Init) || isa(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = Init->getNumOperands(); i != e; ++i) {
+  llvm::Constant *Elt = cast(Init->getOperand(i));
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  if (llvm::ConstantDataSequential *CDS =
+  dyn_cast(Init)) {
+size_t LeadingNonNullBytes = 0;
+for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
+  llvm::Constant *Elt = CDS->getElementAsConstant(i);
+  size_t ExtraBytes = CountLeadingNonNullBytes(DL, Elt);
+  if (!ExtraBytes)
+return LeadingNonNullBytes;
+  LeadingNonNullBytes += ExtraBytes;
+}
+return LeadingNonNullBytes;
+  }
+
+  // Anything else is hard and scary.
+  return 0;
+}
+
 /// Decide whether we can emit the non-zero parts of the specified initializer
 /// with equal or fewer than NumStores scalar stores.
 static bool canEmitInitWithFewStoresAfterBZero(llvm::Constant *Init,
@@ -1209,8 +1250,23 @@ static void emitStoresForConstant(CodeGenModule &CGM, 
const VarDecl &D,
   // If the initializer is all or mostly the same, codegen with bzero / memset
   // then do a few stores afterward.
   if (shouldUseBZeroPlusStoresToInitialize(constant, ConstantSize)) {
-auto *I = Builder.CreateMemSet(Loc, llvm::ConstantInt::get(CGM.Int8Ty, 0),
-   SizeVal, isVolatile);
+size_t LeadingNonNullBytes =
+CountLeadingNonNullBytes(CGM.getDataLayout(), constant);
+// llvm::errs() << LeadingNonNullBytes << " out of " << ConstantSize <<
+// "\n";
+
+llvm::Constant *Z8 = llvm::ConstantInt::get(CGM.Int8Ty, 0);
+Address AdjustedLoc =
+LeadingNonNullBytes ? Builder.CreateConstInBoundsByteGEP(
+  Loc.withElementType(CGM.Int8Ty),
+  CharUnits::fromQuantity(LeadingNonNullBytes))
+: Loc;
+auto *I = Builder.CreateMemSet(
+AdjustedLoc, Z8,
+llvm::ConstantInt::get(CGM.IntPtrTy,
+   ConstantSize - LeadingNonNullBytes),
+isVolatile);
+
 if (IsAutoInit)
   I->addAnnotationMetadata("auto-init");
 
diff --git a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp 
b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
index eed9868cad07f84..b0deb8149ed936f 100644
--- a/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
+++ b/clang/test/CodeGenCXX/trivial-auto-var-init.cpp
@@ -288,14 +288,16 @@ void test_huge_uninit() {
 
 // UNINIT-LABEL:  test_huge_small_init(
 // ZERO-LABEL:test_huge_small_init(
-// ZERO: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// ZERO: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// ZERO: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // ZERO-NOT: !annotation
 // ZERO: store i8 97,
 // ZERO: store i8 98,
 // ZERO: store i8 99,
 // ZERO: store i8 100,
 // PATTERN-LABEL: test_huge_small_init(
-// PATTERN: call void @llvm.memset{{.*}}, i8 0, i64 65536,
+// PATTERN: %[[v0:.*]] = getelementptr inbounds i8, ptr %{{.*}}, i64 4
+// PATTERN: call void @llvm.memset{{.*}}(ptr {{.*}} %[[v0]], i8 0, i64 65532,
 // PATTERN-NOT: !annotation
 // PATTERN: store i8 97,
 // PATTERN: store i8 98,
diff --git a/clang/test/CodeGenOpenCL/partial_initializer.cl 
b/clang/test/CodeGenOpenCL/partial_initializer.cl
index 5cc4e2b246003a1..7c01c750d1afef2 100644
--- a/clang/test/CodeGenOpenCL/partial_initializer.cl
+++ b/c