Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
This revision was automatically updated to reflect the committed changes. Closed by commit rL281527: Do not warn about format strings that are indexed string literals. (authored by srhines). Changed prior to commit: https://reviews.llvm.org/D23820?vs=71286&id=71416#toc Repository: rL LLVM https://reviews.llvm.org/D23820 Files: cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/Sema/format-strings.c Index: cfe/trunk/test/Sema/format-strings.c === --- cfe/trunk/test/Sema/format-strings.c +++ cfe/trunk/test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +} Index: cfe/trunk/lib/Sema/SemaChecking.cpp === --- cfe/trunk/lib/Sema/SemaChecking.cpp +++ cfe/trunk/lib/Sema/SemaChecking.cpp @@ -3841,7 +3841,95 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { +Addend = Addend.zext(++AddendBitWidth); +Addend.setIsSigned(true); + } + // Adjust the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { +Offset = Offset.sext(AddendBitWidth); +BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { +Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) +ResOffset = Offset.sadd_ov(Addend, Ov); + else { +assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); +ResOffset = Offset.ssub_ov(Addend, Ov); + } + + // We add an offset to a pointer here so we should support an offset as big as + // possible. + if (Ov) { +assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); +Offset.sext(2 * BitWidth); +sumOffsets(Offset, Addend, BinOpKind, AddendIsRight); +return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { +return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { +return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { retu
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
srhines added a comment. I will take care of submitting this. Thanks for the reviews everybody! https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb marked an inline comment as done. meikeb added a comment. Thanks for reviewing my patch! It would be great if someone could submit this patch for me. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb marked an inline comment as done. Comment at: lib/Sema/SemaChecking.cpp:3864-3867 @@ +3863,6 @@ +ResOffset = Offset.sadd_ov(Addend, Ov); + else { +assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); +ResOffset = Offset.ssub_ov(Addend, Ov); + } This is way better. Thank you. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb updated this revision to Diff 71286. meikeb added a comment. Correct assert position in offset sum helper function. https://reviews.llvm.org/D23820 Files: lib/Sema/SemaChecking.cpp test/Sema/format-strings.c Index: test/Sema/format-strings.c === --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +} Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3839,7 +3839,95 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { +Addend = Addend.zext(++AddendBitWidth); +Addend.setIsSigned(true); + } + // Adjust the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { +Offset = Offset.sext(AddendBitWidth); +BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { +Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) +ResOffset = Offset.sadd_ov(Addend, Ov); + else { +assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); +ResOffset = Offset.ssub_ov(Addend, Ov); + } + + // We add an offset to a pointer here so we should support an offset as big as + // possible. + if (Ov) { +assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); +Offset.sext(2 * BitWidth); +sumOffsets(Offset, Addend, BinOpKind, AddendIsRight); +return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { +return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { +return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + + SourceLocation getLocationOfByte( + unsigned ByteNo, const
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb marked 2 inline comments as done. meikeb added a comment. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb updated this revision to Diff 71284. meikeb added a comment. Try to improve offset sum helper function name and fix style issues. https://reviews.llvm.org/D23820 Files: lib/Sema/SemaChecking.cpp test/Sema/format-strings.c Index: test/Sema/format-strings.c === --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +} Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3839,7 +3839,95 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { +Addend = Addend.zext(++AddendBitWidth); +Addend.setIsSigned(true); + } + // Adjust the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { +Offset = Offset.sext(AddendBitWidth); +BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { +Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) +ResOffset = Offset.sadd_ov(Addend, Ov); + else if (AddendIsRight && BinOpKind == BO_Sub) +ResOffset = Offset.ssub_ov(Addend, Ov); + else +assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); + + // We add an offset to a pointer here so we should support an offset as big as + // possible. + if (Ov) { +assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); +Offset.sext(2 * BitWidth); +sumOffsets(Offset, Addend, BinOpKind, AddendIsRight); +return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { +return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { +return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + + SourceL
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
rsmith added inline comments. Comment at: lib/Sema/SemaChecking.cpp:3864-3867 @@ +3863,6 @@ +ResOffset = Offset.sadd_ov(Addend, Ov); + else if (AddendIsRight && BinOpKind == BO_Sub) +ResOffset = Offset.ssub_ov(Addend, Ov); + else +assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); The suggestion was to remove the condition in the `else if` and put the assertion inside its body, rather than duplicating it here: if (BinOpKind == BO_Add) // handle add else { assert(it's a subtract); // handle sub } https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
rsmith accepted this revision. rsmith added a comment. This revision is now accepted and ready to land. This basically looks fine to me now. I'm not 100% sold on `sumUpStringLiteralOffset` being the best name, but I think we have better things to worry about, and it's good enough. Just a couple of minor style issues then I think this is good to go. Comment at: lib/Sema/SemaChecking.cpp:3866-3867 @@ +3865,4 @@ +ResOffset = Offset.ssub_ov(Addend, Ov); + else +assert(false && "operator must be add or sub with addend on the right"); + Rather than `assert(false && XXX);`, use either `llvm_unreachable(XXX)` or change the previous case to be: else { assert(AddendIsRight && BinOpKind == BO_Sub && "operator must be ..."); Comment at: lib/Sema/SemaChecking.cpp:4150-4151 @@ +4149,4 @@ + } + FormatStringLiteral FStr = + FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue()); + CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx, You can write this more simply as FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue()); https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb added a comment. I explained why I chose the names that you commented on. Feel free to add your thoughts if you still think another name would be more fitting. Comment at: lib/Sema/SemaChecking.cpp:3842 @@ -3841,2 +3841,3 @@ -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumUpStringLiteralOffset(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, srhines wrote: > Is "computeStringLiteralOffset" or "calculate..." a better name here? I thought about that but decided to go with sumUp because compute or calculate sounds like this function would do what we actually do what the caller of this function does (computing the offset). This is just a nice helper to sum up the offset we already have with another piece of offset. Comment at: lib/Sema/SemaChecking.cpp:3844 @@ +3843,3 @@ + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); srhines wrote: > Is "Operand" better than "Addend"? In particular, there is the possibility > that we do subtraction of the value instead of addition, so "Addend" makes it > a bit confusing. Of course, I then would expect "OperandIsRight" instead of > "AddendIsRight" too. Clang summarizes sub and add as "additive" operands. This is why I think this is fitting. Operand is misleading because it includes a lot more operands than add and sub imo. Comment at: lib/Sema/SemaChecking.cpp:3860 @@ +3859,3 @@ + + bool Ov = false; + llvm::APSInt ResOffset = Offset; srhines wrote: > Ov -> Overflow I named that in compliance with clang naming. E.g. sadd_ov. It is common in this file to abbreviate variable names with 1-3 characters. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb updated this revision to Diff 71199. meikeb marked 7 inline comments as done. meikeb added a comment. Fix typos and add assert to sum up offset helper. https://reviews.llvm.org/D23820 Files: lib/Sema/SemaChecking.cpp test/Sema/format-strings.c Index: test/Sema/format-strings.c === --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +} Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3839,7 +3839,94 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumUpStringLiteralOffset(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { +Addend = Addend.zext(++AddendBitWidth); +Addend.setIsSigned(true); + } + // Adjust the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { +Offset = Offset.sext(AddendBitWidth); +BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { +Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) +ResOffset = Offset.sadd_ov(Addend, Ov); + else if (AddendIsRight && BinOpKind == BO_Sub) +ResOffset = Offset.ssub_ov(Addend, Ov); + else +assert(false && "operator must be add or sub with addend on the right"); + + // We add an offset to a pointer here so we should support an offset as big as + // possible. + if (Ov) { +assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); +Offset.sext(2 * BitWidth); +sumUpStringLiteralOffset(Offset, Addend, BinOpKind, AddendIsRight); +return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { +return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { +return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + +
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
srhines added a comment. My comment is mostly naming considerations to improve clarity. I do have concerns though about the unhandled else path. Comment at: lib/Sema/SemaChecking.cpp:3842 @@ -3841,2 +3841,3 @@ -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumUpStringLiteralOffset(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, Is "computeStringLiteralOffset" or "calculate..." a better name here? Comment at: lib/Sema/SemaChecking.cpp:3844 @@ +3843,3 @@ + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); Is "Operand" better than "Addend"? In particular, there is the possibility that we do subtraction of the value instead of addition, so "Addend" makes it a bit confusing. Of course, I then would expect "OperandIsRight" instead of "AddendIsRight" too. Comment at: lib/Sema/SemaChecking.cpp:3852 @@ +3851,3 @@ + } + // Align the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { Align -> Canonicalize or Adjust This is confusing here as "Align" already has too many overloaded meanings in programming (that could be relevant to bitwidths). Canonicalize or Adjust don't have this problem. Comment at: lib/Sema/SemaChecking.cpp:3860 @@ +3859,3 @@ + + bool Ov = false; + llvm::APSInt ResOffset = Offset; Ov -> Overflow Comment at: lib/Sema/SemaChecking.cpp:3865 @@ +3864,3 @@ + else if (AddendIsRight && BinOpKind == BO_Sub) +ResOffset = Offset.ssub_ov(Addend, Ov); + What happens if someone passes something that isn't caught by these two cases? Should we be returning an indicator that the calculation failed? If not, should we assert here? Comment at: lib/Sema/SemaChecking.cpp:3867 @@ +3866,3 @@ + + // We add a offset to a pointer here so we should support a offset as big as + // possible. 2 places to fix: "a offset" -> "an offset" https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb marked 6 inline comments as done. meikeb added a comment. Thanks for taking the time and doing these great reviews! Really appreciated! Comment at: lib/Sema/SemaChecking.cpp:4143-4150 @@ -4049,3 +4142,10 @@ if (StrE) { - CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, + if (Offset.isNegative() || Offset > StrE->getLength()) { +// TODO: It would be better to have an explicit warning for out of +// bounds literals. +return SLCT_NotALiteral; + } + FormatStringLiteral FStr = + FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue()); + CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx, firstDataArg, Type, InFunctionCall, CallType, The = case is part of a different warning. It's checked in CheckFormatString. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb updated this revision to Diff 71090. meikeb marked an inline comment as done. meikeb added a comment. Fix mentioned issues. https://reviews.llvm.org/D23820 Files: lib/Sema/SemaChecking.cpp test/Sema/format-strings.c Index: test/Sema/format-strings.c === --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +} Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3839,7 +3839,92 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumUpStringLiteralOffset(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { +Addend = Addend.zext(++AddendBitWidth); +Addend.setIsSigned(true); + } + // Align the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { +Offset = Offset.sext(AddendBitWidth); +BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { +Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) +ResOffset = Offset.sadd_ov(Addend, Ov); + else if (AddendIsRight && BinOpKind == BO_Sub) +ResOffset = Offset.ssub_ov(Addend, Ov); + + // We add a offset to a pointer here so we should support a offset as big as + // possible. + if (Ov) { +assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); +Offset.sext(2 * BitWidth); +sumUpStringLiteralOffset(Offset, Addend, BinOpKind, AddendIsRight); +return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { +return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { +return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + + SourceLocation getLocationOfByte( + unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, +
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
rsmith added inline comments. Comment at: lib/Sema/SemaChecking.cpp:3842-3843 @@ -3841,2 +3841,4 @@ -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void reckonUpOffset(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); I can't tell from this declaration what this function is for -- what does "reckon up" mean? Comment at: lib/Sema/SemaChecking.cpp:3880 @@ +3879,3 @@ +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when retruning +// the string and its length or the source locations to display notes correctly. Typo "retruning" Comment at: lib/Sema/SemaChecking.cpp:3891-3892 @@ +3890,4 @@ + StringRef getString() const { +return StringRef(FExpr->getString().data() + Offset, + FExpr->getLength() - Offset); + } I think you can simplify this to `return FExpr->getString().drop_front(Offset);` Comment at: lib/Sema/SemaChecking.cpp:4143 @@ -4049,2 +4142,3 @@ if (StrE) { - CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, + if (Offset.isNegative()) { +// TODO: It would be better to have an explicit warning for out of You should presumably also do this if `Offset` is >= the length of the string literal (we want `printf` and friends to at least find the trailing nul byte). Comment at: lib/Sema/SemaChecking.cpp:4148-4149 @@ +4147,4 @@ + } + FormatStringLiteral *FStr = + new FormatStringLiteral(StrE, Offset.sextOrTrunc(64).getSExtValue()); + CheckFormatString(S, FStr, E, Args, HasVAListArg, format_idx, Does this need to be heap-allocated? Comment at: lib/Sema/SemaChecking.cpp:4166-4167 @@ +4165,4 @@ +if (BinOp->isAdditiveOp()) { + bool LIsInt = false; + bool RIsInt = false; + // Prevent asserts triggering in EvaluateAsInt by checking if we deal with meikeb wrote: > I hope this additional check fixes this issue. As far as I read the code > there were none such asserts in isIntegerConstantExpr(). Thanks for > explaining this! OK, I've now checked and the value-dependent case is handled up on line 3953. So just calling `EvaluateAsInt` here is fine after all. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb marked 4 inline comments as done. Comment at: lib/Sema/SemaChecking.cpp:4166-4167 @@ +4165,4 @@ +if (BinOp->isAdditiveOp()) { + bool LIsInt = false; + bool RIsInt = false; + // Prevent asserts triggering in EvaluateAsInt by checking if we deal with I hope this additional check fixes this issue. As far as I read the code there were none such asserts in isIntegerConstantExpr(). Thanks for explaining this! https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb updated this revision to Diff 71043. meikeb added a comment. Fix the mentioned issues. https://reviews.llvm.org/D23820 Files: lib/Sema/SemaChecking.cpp test/Sema/format-strings.c Index: test/Sema/format-strings.c === --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,38 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +} Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3839,7 +3839,92 @@ }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void reckonUpOffset(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { +Addend = Addend.zext(++AddendBitWidth); +Addend.setIsSigned(true); + } + // Align the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { +Offset = Offset.sext(AddendBitWidth); +BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { +Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) +ResOffset = Offset.sadd_ov(Addend, Ov); + else if (AddendIsRight && BinOpKind == BO_Sub) +ResOffset = Offset.ssub_ov(Addend, Ov); + + // We add a offset to a pointer here so we should support a offset as big as + // possible. + if (Ov) { +assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); +Offset.sext(2 * BitWidth); +reckonUpOffset(Offset, Addend, BinOpKind, AddendIsRight); +return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when retruning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { +return StringRef(FExpr->getString().data() + Offset, + FExpr->getLength() - Offset); + } + + unsigned getByteLength() const { +return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + + SourceLocation getLocationOfByte( + unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, + const TargetInfo &Target, unsigned *StartTok
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
rsmith added inline comments. Comment at: lib/Sema/SemaChecking.cpp:4089-4090 @@ +4088,4 @@ +if (BinOp->isAdditiveOp()) { + bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); + bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); + meikeb wrote: > rsmith wrote: > > What happens if one of these expressions is value-dependent? The evaluator > > can crash or assert if given a value-dependent expression. If we don't > > defer these checks in dependent contexts, you'll need to handle that > > possibility somehow. > > > > Example: > > > > template void f(const char *p) { > > printf("blah blah %s" + N, p); > > } > I think I don't understand what you are trying to tell me. Especially the > example you provided does just fine and behaves as I expected. As far as I > followed EvaluateAsInt it does not assert but returns false if we don't get a > constexpr here. We warn under -Wformat-nonliteral for value-dependent string > literals. > > Could you explain this more or provide an example that triggers an assert or > explain what behavior is wrong regarding the provided example? Thanks! We should not warn for that example, since (for instance) calling `f<0>` is fine (we should warn for `f<11>`, though, since it has no format specifiers). While `EvaluateAsInt` happens to not assert for that particular value-dependent input, it does assert for some other value-dependent cases. It's not easy for me to find you such a case, because Clang is currently careful to never call this function on a value-dependent expression, but perhaps this will trigger an assert: struct S { constexpr S(int n) : n(n) {} int n; }; template void f(const char *p) { printf("blah blah %s" + S(N).n, p); } https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb added inline comments. Comment at: lib/Sema/SemaChecking.cpp:4089-4090 @@ +4088,4 @@ +if (BinOp->isAdditiveOp()) { + bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); + bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); + rsmith wrote: > What happens if one of these expressions is value-dependent? The evaluator > can crash or assert if given a value-dependent expression. If we don't defer > these checks in dependent contexts, you'll need to handle that possibility > somehow. > > Example: > > template void f(const char *p) { > printf("blah blah %s" + N, p); > } I think I don't understand what you are trying to tell me. Especially the example you provided does just fine and behaves as I expected. As far as I followed EvaluateAsInt it does not assert but returns false if we don't get a constexpr here. We warn under -Wformat-nonliteral for value-dependent string literals. Could you explain this more or provide an example that triggers an assert or explain what behavior is wrong regarding the provided example? Thanks! https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
rsmith added a comment. Please also handle expressions of the form `&"str"[i]`. We warn (under `-Wstring-plus-int`) for `"str" + i` and suggest rewriting into that form, so we should also handle that form here. Comment at: lib/Sema/SemaChecking.cpp:3864 @@ -3864,1 +3863,3 @@ + UncoveredArgHandler &UncoveredArg, + int64_t &Offset) { tryAgain: Why is this passed by reference? It's just an input, not an output, right? Comment at: lib/Sema/SemaChecking.cpp:4062-4067 @@ +4061,8 @@ +// into the original string literal. +StrE = StringLiteral::Create(S.Context, + StrE->getString().drop_front(Offset), + StrE->getKind(), + StrE->isPascal(), + StrE->getType(), + StrE->getLocStart()); + } else if (Offset < 0) { This doesn't seem like it preserves enough information for the downstream code to give correct caret diagnostics pointing at locations within the string. It seems like it would be extremely complicated to maintain the necessary invariants to make that work (you'd need to create a fake string literal source buffer so that the `StringLiteralParser` can reparse it, for whichever of the string literal tokens the offset ends up within). Have you looked at how much work it'd be to feed a starting offset into `CheckFormatString` instead? Comment at: lib/Sema/SemaChecking.cpp:4089-4090 @@ +4088,4 @@ +if (BinOp->isAdditiveOp()) { + bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); + bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); + What happens if one of these expressions is value-dependent? The evaluator can crash or assert if given a value-dependent expression. If we don't defer these checks in dependent contexts, you'll need to handle that possibility somehow. Example: template void f(const char *p) { printf("blah blah %s" + N, p); } Comment at: lib/Sema/SemaChecking.cpp:4097-4100 @@ +4096,6 @@ + if (LIsInt) { +Offset += LResult.getExtValue(); +E = BinOp->getRHS(); + } else { +Offset += RResult.getExtValue(); +E = BinOp->getLHS(); This will assert if the result doesn't fit into 64 bits, and it's not guaranteed to (if one of the operands was cast to `__int128`, for instance). You could use `getLimitedValue` instead, with some suitable limit. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb updated this revision to Diff 69057. meikeb added a comment. Fix typo. https://reviews.llvm.org/D23820 Files: lib/Sema/SemaChecking.cpp test/Sema/format-strings.c Index: test/Sema/format-strings.c === --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,34 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2);// no-warning + printf(6 + s2 - 2);// no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} +} Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3860,7 +3860,8 @@ unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg) { + UncoveredArgHandler &UncoveredArg, + int64_t &Offset) { tryAgain: if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3895,23 +3896,31 @@ CheckLeft = false; } +// We need to maintain the offsets for the right and the left hand side +// separately to check if every possible indexed expression is a valid +// string literal. They might have different offsets for different string +// literals in the end. +int64_t LOffset = Offset; StringLiteralCheckType Left; if (!CheckLeft) Left = SLCT_UncheckedLiteral; else { Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); - if (Left == SLCT_NotALiteral || !CheckRight) + CheckedVarArgs, UncoveredArg, LOffset); + if (Left == SLCT_NotALiteral || !CheckRight) { +Offset = LOffset; return Left; + } } +int64_t ROffset = Offset; StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, ROffset); return (CheckLeft && Left < Right) ? Left : Right; } @@ -3964,8 +3973,8 @@ return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/false, CheckedVarArgs, - UncoveredArg); + /*InFunctionCall*/ false, CheckedVarArgs, + UncoveredArg, Offset); } } @@ -4020,7 +4029,7 @@ return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); + CheckedVarArgs, UncoveredArg, Offset); } else if (const FunctionDecl *FD = dyn_cast(ND)) { unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -4030,7 +4039,7 @@
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb marked an inline comment as done. Comment at: lib/Sema/SemaChecking.cpp:3856 @@ -3855,3 +3855,3 @@ // format string, we will usually need to emit a warning. // True string literals are then checked by CheckFormatString. static StringLiteralCheckType I'm not sure if that should be mentioned here because it is a very high level comment and the suffix of a string literal is a string literal itself. https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
Re: [PATCH] D23820: Do not warn about format strings that are indexed string literals.
srhines added inline comments. Comment at: lib/Sema/SemaChecking.cpp:3856 @@ -3855,3 +3855,3 @@ // format string, we will usually need to emit a warning. // True string literals are then checked by CheckFormatString. static StringLiteralCheckType It might be good to mention that Offset now goes back to the caller to allow for checking of string literal suffixes. Comment at: lib/Sema/SemaChecking.cpp:3900 @@ +3899,3 @@ +// We need to maintain the offsets for the right and the left hand side +// seperately to check if every possible indexed expression is a valid +// string literal. They might have different offsets for different string separately https://reviews.llvm.org/D23820 ___ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D23820: Do not warn about format strings that are indexed string literals.
meikeb created this revision. meikeb added a reviewer: rsmith. meikeb added a subscriber: cfe-commits. The warning for a format string not being a sting literal and therefore being potentially insecure is overly strict for indecies into sting literals. This fix checks if the index into the string literal is precomputable. If thats the case it will check if the suffix of that sting literal is a valid format string string literal. It will still issue the aforementioned warning for out of range indecies into the string literal. https://reviews.llvm.org/D23820 Files: lib/Sema/SemaChecking.cpp test/Sema/format-strings.c Index: test/Sema/format-strings.c === --- test/Sema/format-strings.c +++ test/Sema/format-strings.c @@ -652,3 +652,34 @@ // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2);// no-warning + printf(6 + s2 - 2);// no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} +} Index: lib/Sema/SemaChecking.cpp === --- lib/Sema/SemaChecking.cpp +++ lib/Sema/SemaChecking.cpp @@ -3860,7 +3860,8 @@ unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg) { + UncoveredArgHandler &UncoveredArg, + int64_t &Offset) { tryAgain: if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3895,23 +3896,31 @@ CheckLeft = false; } +// We need to maintain the offsets for the right and the left hand side +// seperately to check if every possible indexed expression is a valid +// string literal. They might have different offsets for different string +// literals in the end. +int64_t LOffset = Offset; StringLiteralCheckType Left; if (!CheckLeft) Left = SLCT_UncheckedLiteral; else { Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); - if (Left == SLCT_NotALiteral || !CheckRight) + CheckedVarArgs, UncoveredArg, LOffset); + if (Left == SLCT_NotALiteral || !CheckRight) { +Offset = LOffset; return Left; + } } +int64_t ROffset = Offset; StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, ROffset); return (CheckLeft && Left < Right) ? Left : Right; } @@ -3964,8 +3973,8 @@ return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/false, CheckedVarArgs, - UncoveredArg); + /*InFunctionCall*/ false, CheckedVarArgs, + UncoveredArg, Offset); } } @@ -4020,7 +4029,7 @@ return checkFormatStringExpr(S, Arg, Args, HasVAListArg, forma