Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Stephen Hines via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL281686: Do not warn about format strings that are indexed 
string literals. (authored by srhines).

Changed prior to commit:
  https://reviews.llvm.org/D24584?vs=71585&id=71586#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D24584

Files:
  cfe/trunk/lib/Sema/SemaChecking.cpp
  cfe/trunk/test/Sema/format-strings.c

Index: cfe/trunk/lib/Sema/SemaChecking.cpp
===
--- cfe/trunk/lib/Sema/SemaChecking.cpp
+++ cfe/trunk/lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager &SM, const LangOptions &Features,
+  const TargetInfo &Target, unsigned *StartToken = nullptr,
+  unsigned *StartTokenByteOffset = nullptr) const {
+return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target,
+StartToken, StartTokenByteOffset);
+  }
+
+  SourceLocation getLocStart() const LLVM_READONLY {
+return FExpr->getLocStart().getLocWithOffset(Offset);
+  }
+  SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); }
+};
+}  // end anonymous namespace
+
+static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr,
   const Expr *OrigFormatExpr,
   ArrayRef Args,
   bool HasVAListArg, unsigned format_idx,
@@ -3871,8 +3959,11 @@
   unsigned firstDataArg, Sema::FormatStringType Type,
   Sema::VariadicCallType CallType, bool InFunctionCall,
   llvm::SmallBitVector &CheckedVarArgs,
-  UncoveredArgHandler &UncoveredArg) {
+  UncoveredArgHandler &UncoveredArg,
+  llvm::APSInt Offset) {
  tryAgain:
+  assert(Offset.isSigned() && "invalid offset");
+
   if (E->isTypeDependent() || E->isValueDependent())
 return SLCT_NotALiteral;
 
@@ -3906,23 +3997,28 @@
 CheckLeft = false;
 }
 
+// We need to maintain the offsets for the right and the left hand side
+// separately to check if every possible indexed expression is a valid
+// string literal. They might h

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71585.
meikeb added a comment.

.


https://reviews.llvm.org/D24584

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager &SM, const LangOptions &Features,
+

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71584.
meikeb added a comment.

Rebase


https://reviews.llvm.org/D24584

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGBlocks.h
  lib/Sema/SemaChecking.cpp
  test/CodeGenObjCXX/lambda-expressions.mm
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: test/CodeGenObjCXX/lambda-expressions.mm
===
--- test/CodeGenObjCXX/lambda-expressions.mm
+++ test/CodeGenObjCXX/lambda-expressions.mm
@@ -4,6 +4,8 @@
 typedef int (^fp)();
 fp f() { auto x = []{ return 3; }; return x; }
 
+// ARC: %[[LAMBDACLASS:.*]] = type { i32 }
+
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [5 x i8] c"copy\00"
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [12 x i8] c"autorelease\00"
 // MRC-LABEL: define i32 ()* @_Z1fv(
@@ -60,6 +62,40 @@
 }
 @end
 
+// ARC: define void @_ZN13LambdaCapture4foo1ERi(i32* dereferenceable(4) %{{.*}})
+// ARC:   %[[CAPTURE0:.*]] = getelementptr inbounds %[[LAMBDACLASS]], %[[LAMBDACLASS]]* %{{.*}}, i32 0, i32 0
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE0]]
+
+// ARC: define internal void @"_ZZN13LambdaCapture4foo1ERiENK3$_3clEv"(%[[LAMBDACLASS]]* %{{.*}})
+// ARC:   %[[BLOCK:.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>
+// ARC:   %[[CAPTURE1:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %[[BLOCK]], i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE1]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke"
+// ARC:   %[[CAPTURE2:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE2]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke_2"(i8* %{{.*}})
+// ARC:   %[[CAPTURE3:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   %[[V1:.*]] = load i32, i32* %[[CAPTURE3]]
+// ARC:   store i32 %[[V1]], i32* @_ZN13LambdaCapture1iE
+
+namespace LambdaCapture {
+  int i;
+  void foo1(int &a) {
+auto lambda = [a]{
+  auto block1 = ^{
+auto block2 = ^{
+  i = a;
+};
+block2();
+  };
+  block1();
+};
+lambda();
+  }
+}
+
 // ARC-LABEL: define linkonce_odr i32 ()* @_ZZNK13StaticMembersIfE1fMUlvE_clEvENKUlvE_cvU13block_pointerFivEEv
 
 // Check lines for BlockInLambda test below
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth =

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71583.
meikeb added a comment.

Try to drop randomly uploaded commit.


https://reviews.llvm.org/D24584

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager &SM

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-15 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71581.
meikeb added a comment.

Rebase to current commit.


https://reviews.llvm.org/D24584

Files:
  lib/CodeGen/CGBlocks.cpp
  lib/CodeGen/CGBlocks.h
  lib/Sema/SemaChecking.cpp
  test/CodeGenObjCXX/lambda-expressions.mm
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: test/CodeGenObjCXX/lambda-expressions.mm
===
--- test/CodeGenObjCXX/lambda-expressions.mm
+++ test/CodeGenObjCXX/lambda-expressions.mm
@@ -4,6 +4,8 @@
 typedef int (^fp)();
 fp f() { auto x = []{ return 3; }; return x; }
 
+// ARC: %[[LAMBDACLASS:.*]] = type { i32 }
+
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [5 x i8] c"copy\00"
 // MRC: @OBJC_METH_VAR_NAME{{.*}} = private global [12 x i8] c"autorelease\00"
 // MRC-LABEL: define i32 ()* @_Z1fv(
@@ -60,6 +62,40 @@
 }
 @end
 
+// ARC: define void @_ZN13LambdaCapture4foo1ERi(i32* dereferenceable(4) %{{.*}})
+// ARC:   %[[CAPTURE0:.*]] = getelementptr inbounds %[[LAMBDACLASS]], %[[LAMBDACLASS]]* %{{.*}}, i32 0, i32 0
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE0]]
+
+// ARC: define internal void @"_ZZN13LambdaCapture4foo1ERiENK3$_3clEv"(%[[LAMBDACLASS]]* %{{.*}})
+// ARC:   %[[BLOCK:.*]] = alloca <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>
+// ARC:   %[[CAPTURE1:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %[[BLOCK]], i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE1]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke"
+// ARC:   %[[CAPTURE2:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   store i32 %{{.*}}, i32* %[[CAPTURE2]]
+
+// ARC: define internal void @"___ZZN13LambdaCapture4foo1ERiENK3$_3clEv_block_invoke_2"(i8* %{{.*}})
+// ARC:   %[[CAPTURE3:.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, i32 }>* %{{.*}}, i32 0, i32 5
+// ARC:   %[[V1:.*]] = load i32, i32* %[[CAPTURE3]]
+// ARC:   store i32 %[[V1]], i32* @_ZN13LambdaCapture1iE
+
+namespace LambdaCapture {
+  int i;
+  void foo1(int &a) {
+auto lambda = [a]{
+  auto block1 = ^{
+auto block2 = ^{
+  i = a;
+};
+block2();
+  };
+  block1();
+};
+lambda();
+  }
+}
+
 // ARC-LABEL: define linkonce_odr i32 ()* @_ZZNK13StaticMembersIfE1fMUlvE_clEvENKUlvE_cvU13block_pointerFivEEv
 
 // Check lines for BlockInLambda test below
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3850,7 +3850,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Meike Baumgärtner via cfe-commits
meikeb updated this revision to Diff 71438.
meikeb added a comment.

Fix typos in commit message.


https://reviews.llvm.org/D24584

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind(); }
+
+  QualType getType() const { return FExpr->getType(); }
+
+  bool isAscii() const { return FExpr->isAscii(); }
+  bool isWide() const { return FExpr->isWide(); }
+  bool isUTF8() const { return FExpr->isUTF8(); }
+  bool isUTF16() const { return FExpr->isUTF16(); }
+  bool isUTF32() const { return FExpr->isUTF32(); }
+  bool isPascal() const { return FExpr->isPascal(); }
+
+  SourceLocation getLocationOfByte(
+  unsigned ByteNo, const SourceManager &SM, const L

Re: [PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Meike Baumgärtner via cfe-commits
meikeb added a comment.

This is the same as https://reviews.llvm.org/D23820 besides that I added myself 
in the commit message as "Patch by". https://reviews.llvm.org/D23820 was 
reverted in https://reviews.llvm.org/D24579 because srhines' commit took 
authorship of this patch.


https://reviews.llvm.org/D24584



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D24584: Do not warn about format strings that are indexed string literals.

2016-09-14 Thread Meike Baumgärtner via cfe-commits
meikeb created this revision.
meikeb added a reviewer: rsmith.
meikeb added subscribers: cfe-commits, srhines.

The warning for a format string not being a sting literal and therefore
being potentially insecure is overly strict for indecies into sting
literals. This fix checks if the index into the string literal is
precomputable. If thats the case it will check if the suffix of that
sting literal is a valid format string string literal. It will still
issue the aforementioned warning for out of range indecies into the
string literal.

Patch by Meike Baumgärtner (meikeb)

https://reviews.llvm.org/D24584

Files:
  lib/Sema/SemaChecking.cpp
  test/Sema/format-strings.c

Index: test/Sema/format-strings.c
===
--- test/Sema/format-strings.c
+++ test/Sema/format-strings.c
@@ -652,3 +652,38 @@
   // expected-note@-1{{treat the string as an argument to avoid this}}
 }
 #pragma GCC diagnostic warning "-Wformat-nonliteral"
+
+void test_char_pointer_arithmetic(int b) {
+  const char s1[] = "string";
+  const char s2[] = "%s string";
+
+  printf(s1 - 1);  // expected-warning {{format string is not a string literal (potentially insecure)}}
+  // expected-note@-1{{treat the string as an argument to avoid this}}
+
+  printf(s1 + 2);  // no-warning
+  printf(s2 + 2);  // no-warning
+
+  const char s3[] = "%s string";
+  printf((s3 + 2) - 2);  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + s2); // no-warning
+  printf(6 + s2 - 2); // no-warning
+  printf(2 + (b ? s1 : s2));  // no-warning
+
+  const char s5[] = "string %s";
+  printf(2 + (b ? s2 : s5));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(2 + (b ? s2 : s5), "");  // no-warning
+  printf(2 + (b ? s1 : s2 - 2), "");  // no-warning
+
+  const char s6[] = "%s string";
+  printf(2 + (b ? s1 : s6 - 2));  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+  printf(1 ? s2 + 2 : s2);  // no-warning
+  printf(0 ? s2 : s2 + 2);  // no-warning
+  printf(2 + s2 + 5 * 3 - 16, "");  // expected-warning{{data argument not used}}
+
+  const char s7[] = "%s string %s %s";
+  printf(s7 + 3, "");  // expected-warning{{more '%' conversions than data arguments}}
+  // expected-note@-2{{format string is defined here}}
+}
Index: lib/Sema/SemaChecking.cpp
===
--- lib/Sema/SemaChecking.cpp
+++ lib/Sema/SemaChecking.cpp
@@ -3839,7 +3839,95 @@
 };
 } // end anonymous namespace
 
-static void CheckFormatString(Sema &S, const StringLiteral *FExpr,
+static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend,
+ BinaryOperatorKind BinOpKind,
+ bool AddendIsRight) {
+  unsigned BitWidth = Offset.getBitWidth();
+  unsigned AddendBitWidth = Addend.getBitWidth();
+  // There might be negative interim results.
+  if (Addend.isUnsigned()) {
+Addend = Addend.zext(++AddendBitWidth);
+Addend.setIsSigned(true);
+  }
+  // Adjust the bit width of the APSInts.
+  if (AddendBitWidth > BitWidth) {
+Offset = Offset.sext(AddendBitWidth);
+BitWidth = AddendBitWidth;
+  } else if (BitWidth > AddendBitWidth) {
+Addend = Addend.sext(BitWidth);
+  }
+
+  bool Ov = false;
+  llvm::APSInt ResOffset = Offset;
+  if (BinOpKind == BO_Add)
+ResOffset = Offset.sadd_ov(Addend, Ov);
+  else {
+assert(AddendIsRight && BinOpKind == BO_Sub &&
+   "operator must be add or sub with addend on the right");
+ResOffset = Offset.ssub_ov(Addend, Ov);
+  }
+
+  // We add an offset to a pointer here so we should support an offset as big as
+  // possible.
+  if (Ov) {
+assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big");
+Offset.sext(2 * BitWidth);
+sumOffsets(Offset, Addend, BinOpKind, AddendIsRight);
+return;
+  }
+
+  Offset = ResOffset;
+}
+
+namespace {
+// This is a wrapper class around StringLiteral to support offsetted string
+// literals as format strings. It takes the offset into account when returning
+// the string and its length or the source locations to display notes correctly.
+class FormatStringLiteral {
+  const StringLiteral *FExpr;
+  int64_t Offset;
+
+ public:
+  FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0)
+  : FExpr(fexpr), Offset(Offset) {}
+
+  StringRef getString() const {
+return FExpr->getString().drop_front(Offset);
+  }
+
+  unsigned getByteLength() const {
+return FExpr->getByteLength() - getCharByteWidth() * Offset;
+  }
+  unsigned getLength() const { return FExpr->getLength() - Offset; }
+  unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); }
+
+  StringLiteral::StringKind getKind() const { return FExpr->getKind