Author: srhines Date: Wed Sep 14 15:05:20 2016 New Revision: 281527 URL: http://llvm.org/viewvc/llvm-project?rev=281527&view=rev Log: Do not warn about format strings that are indexed string literals.
Summary: The warning for a format string not being a sting literal and therefore being potentially insecure is overly strict for indecies into sting literals. This fix checks if the index into the string literal is precomputable. If thats the case it will check if the suffix of that sting literal is a valid format string string literal. It will still issue the aforementioned warning for out of range indecies into the string literal. Reviewers: rsmith Subscribers: srhines, cfe-commits Differential Revision: https://reviews.llvm.org/D23820 Modified: cfe/trunk/lib/Sema/SemaChecking.cpp cfe/trunk/test/Sema/format-strings.c Modified: cfe/trunk/lib/Sema/SemaChecking.cpp URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=281527&r1=281526&r2=281527&view=diff ============================================================================== --- cfe/trunk/lib/Sema/SemaChecking.cpp (original) +++ cfe/trunk/lib/Sema/SemaChecking.cpp Wed Sep 14 15:05:20 2016 @@ -3841,7 +3841,95 @@ enum StringLiteralCheckType { }; } // end anonymous namespace -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void sumOffsets(llvm::APSInt &Offset, llvm::APSInt Addend, + BinaryOperatorKind BinOpKind, + bool AddendIsRight) { + unsigned BitWidth = Offset.getBitWidth(); + unsigned AddendBitWidth = Addend.getBitWidth(); + // There might be negative interim results. + if (Addend.isUnsigned()) { + Addend = Addend.zext(++AddendBitWidth); + Addend.setIsSigned(true); + } + // Adjust the bit width of the APSInts. + if (AddendBitWidth > BitWidth) { + Offset = Offset.sext(AddendBitWidth); + BitWidth = AddendBitWidth; + } else if (BitWidth > AddendBitWidth) { + Addend = Addend.sext(BitWidth); + } + + bool Ov = false; + llvm::APSInt ResOffset = Offset; + if (BinOpKind == BO_Add) + ResOffset = Offset.sadd_ov(Addend, Ov); + else { + assert(AddendIsRight && BinOpKind == BO_Sub && + "operator must be add or sub with addend on the right"); + ResOffset = Offset.ssub_ov(Addend, Ov); + } + + // We add an offset to a pointer here so we should support an offset as big as + // possible. + if (Ov) { + assert(BitWidth <= UINT_MAX / 2 && "index (intermediate) result too big"); + Offset.sext(2 * BitWidth); + sumOffsets(Offset, Addend, BinOpKind, AddendIsRight); + return; + } + + Offset = ResOffset; +} + +namespace { +// This is a wrapper class around StringLiteral to support offsetted string +// literals as format strings. It takes the offset into account when returning +// the string and its length or the source locations to display notes correctly. +class FormatStringLiteral { + const StringLiteral *FExpr; + int64_t Offset; + + public: + FormatStringLiteral(const StringLiteral *fexpr, int64_t Offset = 0) + : FExpr(fexpr), Offset(Offset) {} + + StringRef getString() const { + return FExpr->getString().drop_front(Offset); + } + + unsigned getByteLength() const { + return FExpr->getByteLength() - getCharByteWidth() * Offset; + } + unsigned getLength() const { return FExpr->getLength() - Offset; } + unsigned getCharByteWidth() const { return FExpr->getCharByteWidth(); } + + StringLiteral::StringKind getKind() const { return FExpr->getKind(); } + + QualType getType() const { return FExpr->getType(); } + + bool isAscii() const { return FExpr->isAscii(); } + bool isWide() const { return FExpr->isWide(); } + bool isUTF8() const { return FExpr->isUTF8(); } + bool isUTF16() const { return FExpr->isUTF16(); } + bool isUTF32() const { return FExpr->isUTF32(); } + bool isPascal() const { return FExpr->isPascal(); } + + SourceLocation getLocationOfByte( + unsigned ByteNo, const SourceManager &SM, const LangOptions &Features, + const TargetInfo &Target, unsigned *StartToken = nullptr, + unsigned *StartTokenByteOffset = nullptr) const { + return FExpr->getLocationOfByte(ByteNo + Offset, SM, Features, Target, + StartToken, StartTokenByteOffset); + } + + SourceLocation getLocStart() const LLVM_READONLY { + return FExpr->getLocStart().getLocWithOffset(Offset); + } + SourceLocation getLocEnd() const LLVM_READONLY { return FExpr->getLocEnd(); } +}; +} // end anonymous namespace + +static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef<const Expr *> Args, bool HasVAListArg, unsigned format_idx, @@ -3862,8 +3950,11 @@ checkFormatStringExpr(Sema &S, const Exp unsigned firstDataArg, Sema::FormatStringType Type, Sema::VariadicCallType CallType, bool InFunctionCall, llvm::SmallBitVector &CheckedVarArgs, - UncoveredArgHandler &UncoveredArg) { + UncoveredArgHandler &UncoveredArg, + llvm::APSInt Offset) { tryAgain: + assert(Offset.isSigned() && "invalid offset"); + if (E->isTypeDependent() || E->isValueDependent()) return SLCT_NotALiteral; @@ -3897,6 +3988,10 @@ checkFormatStringExpr(Sema &S, const Exp CheckLeft = false; } + // We need to maintain the offsets for the right and the left hand side + // separately to check if every possible indexed expression is a valid + // string literal. They might have different offsets for different string + // literals in the end. StringLiteralCheckType Left; if (!CheckLeft) Left = SLCT_UncheckedLiteral; @@ -3904,16 +3999,17 @@ checkFormatStringExpr(Sema &S, const Exp Left = checkFormatStringExpr(S, C->getTrueExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); - if (Left == SLCT_NotALiteral || !CheckRight) + CheckedVarArgs, UncoveredArg, Offset); + if (Left == SLCT_NotALiteral || !CheckRight) { return Left; + } } StringLiteralCheckType Right = checkFormatStringExpr(S, C->getFalseExpr(), Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, Offset); return (CheckLeft && Left < Right) ? Left : Right; } @@ -3966,8 +4062,8 @@ checkFormatStringExpr(Sema &S, const Exp return checkFormatStringExpr(S, Init, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*InFunctionCall*/false, CheckedVarArgs, - UncoveredArg); + /*InFunctionCall*/ false, CheckedVarArgs, + UncoveredArg, Offset); } } @@ -4022,7 +4118,7 @@ checkFormatStringExpr(Sema &S, const Exp return checkFormatStringExpr(S, Arg, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, - CheckedVarArgs, UncoveredArg); + CheckedVarArgs, UncoveredArg, Offset); } else if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(ND)) { unsigned BuiltinID = FD->getBuiltinID(); if (BuiltinID == Builtin::BI__builtin___CFStringMakeConstantString || @@ -4032,7 +4128,7 @@ checkFormatStringExpr(Sema &S, const Exp HasVAListArg, format_idx, firstDataArg, Type, CallType, InFunctionCall, CheckedVarArgs, - UncoveredArg); + UncoveredArg, Offset); } } } @@ -4049,7 +4145,13 @@ checkFormatStringExpr(Sema &S, const Exp StrE = cast<StringLiteral>(E); if (StrE) { - CheckFormatString(S, StrE, E, Args, HasVAListArg, format_idx, + if (Offset.isNegative() || Offset > StrE->getLength()) { + // TODO: It would be better to have an explicit warning for out of + // bounds literals. + return SLCT_NotALiteral; + } + FormatStringLiteral FStr(StrE, Offset.sextOrTrunc(64).getSExtValue()); + CheckFormatString(S, &FStr, E, Args, HasVAListArg, format_idx, firstDataArg, Type, InFunctionCall, CallType, CheckedVarArgs, UncoveredArg); return SLCT_CheckedLiteral; @@ -4057,6 +4159,50 @@ checkFormatStringExpr(Sema &S, const Exp return SLCT_NotALiteral; } + case Stmt::BinaryOperatorClass: { + llvm::APSInt LResult; + llvm::APSInt RResult; + + const BinaryOperator *BinOp = cast<BinaryOperator>(E); + + // A string literal + an int offset is still a string literal. + if (BinOp->isAdditiveOp()) { + bool LIsInt = BinOp->getLHS()->EvaluateAsInt(LResult, S.Context); + bool RIsInt = BinOp->getRHS()->EvaluateAsInt(RResult, S.Context); + + if (LIsInt != RIsInt) { + BinaryOperatorKind BinOpKind = BinOp->getOpcode(); + + if (LIsInt) { + if (BinOpKind == BO_Add) { + sumOffsets(Offset, LResult, BinOpKind, RIsInt); + E = BinOp->getRHS(); + goto tryAgain; + } + } else { + sumOffsets(Offset, RResult, BinOpKind, RIsInt); + E = BinOp->getLHS(); + goto tryAgain; + } + } + + return SLCT_NotALiteral; + } + } + case Stmt::UnaryOperatorClass: { + const UnaryOperator *UnaOp = cast<UnaryOperator>(E); + auto ASE = dyn_cast<ArraySubscriptExpr>(UnaOp->getSubExpr()); + if (UnaOp->getOpcode() == clang::UO_AddrOf && ASE) { + llvm::APSInt IndexResult; + if (ASE->getRHS()->EvaluateAsInt(IndexResult, S.Context)) { + sumOffsets(Offset, IndexResult, BO_Add, /*RHS is int*/ true); + E = ASE->getBase(); + goto tryAgain; + } + } + + return SLCT_NotALiteral; + } default: return SLCT_NotALiteral; @@ -4123,8 +4269,9 @@ bool Sema::CheckFormatArguments(ArrayRef StringLiteralCheckType CT = checkFormatStringExpr(*this, OrigFormatExpr, Args, HasVAListArg, format_idx, firstDataArg, Type, CallType, - /*IsFunctionCall*/true, CheckedVarArgs, - UncoveredArg); + /*IsFunctionCall*/ true, CheckedVarArgs, + UncoveredArg, + /*no string offset*/ llvm::APSInt(64, false) = 0); // Generate a diagnostic where an uncovered argument is detected. if (UncoveredArg.hasUncoveredArg()) { @@ -4180,7 +4327,7 @@ namespace { class CheckFormatHandler : public analyze_format_string::FormatStringHandler { protected: Sema &S; - const StringLiteral *FExpr; + const FormatStringLiteral *FExpr; const Expr *OrigFormatExpr; const unsigned FirstDataArg; const unsigned NumDataArgs; @@ -4197,7 +4344,7 @@ protected: UncoveredArgHandler &UncoveredArg; public: - CheckFormatHandler(Sema &s, const StringLiteral *fexpr, + CheckFormatHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef<const Expr *> Args, @@ -4297,7 +4444,8 @@ getSpecifierRange(const char *startSpeci } SourceLocation CheckFormatHandler::getLocationOfByte(const char *x) { - return S.getLocationOfStringLiteralByte(FExpr, x - Beg); + return FExpr->getLocationOfByte(x - Beg, S.getSourceManager(), + S.getLangOpts(), S.Context.getTargetInfo()); } void CheckFormatHandler::HandleIncompleteSpecifier(const char *startSpecifier, @@ -4636,7 +4784,7 @@ class CheckPrintfHandler : public CheckF bool ObjCContext; public: - CheckPrintfHandler(Sema &s, const StringLiteral *fexpr, + CheckPrintfHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, bool isObjC, const char *beg, bool hasVAListArg, @@ -5423,7 +5571,7 @@ CheckPrintfHandler::checkFormatExpr(cons namespace { class CheckScanfHandler : public CheckFormatHandler { public: - CheckScanfHandler(Sema &s, const StringLiteral *fexpr, + CheckScanfHandler(Sema &s, const FormatStringLiteral *fexpr, const Expr *origFormatExpr, unsigned firstDataArg, unsigned numDataArgs, const char *beg, bool hasVAListArg, ArrayRef<const Expr *> Args, @@ -5594,7 +5742,7 @@ bool CheckScanfHandler::HandleScanfSpeci return true; } -static void CheckFormatString(Sema &S, const StringLiteral *FExpr, +static void CheckFormatString(Sema &S, const FormatStringLiteral *FExpr, const Expr *OrigFormatExpr, ArrayRef<const Expr *> Args, bool HasVAListArg, unsigned format_idx, Modified: cfe/trunk/test/Sema/format-strings.c URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/Sema/format-strings.c?rev=281527&r1=281526&r2=281527&view=diff ============================================================================== --- cfe/trunk/test/Sema/format-strings.c (original) +++ cfe/trunk/test/Sema/format-strings.c Wed Sep 14 15:05:20 2016 @@ -652,3 +652,38 @@ void test_format_security_pos(char* stri // expected-note@-1{{treat the string as an argument to avoid this}} } #pragma GCC diagnostic warning "-Wformat-nonliteral" + +void test_char_pointer_arithmetic(int b) { + const char s1[] = "string"; + const char s2[] = "%s string"; + + printf(s1 - 1); // expected-warning {{format string is not a string literal (potentially insecure)}} + // expected-note@-1{{treat the string as an argument to avoid this}} + + printf(s1 + 2); // no-warning + printf(s2 + 2); // no-warning + + const char s3[] = "%s string"; + printf((s3 + 2) - 2); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + s2); // no-warning + printf(6 + s2 - 2); // no-warning + printf(2 + (b ? s1 : s2)); // no-warning + + const char s5[] = "string %s"; + printf(2 + (b ? s2 : s5)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(2 + (b ? s2 : s5), ""); // no-warning + printf(2 + (b ? s1 : s2 - 2), ""); // no-warning + + const char s6[] = "%s string"; + printf(2 + (b ? s1 : s6 - 2)); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} + printf(1 ? s2 + 2 : s2); // no-warning + printf(0 ? s2 : s2 + 2); // no-warning + printf(2 + s2 + 5 * 3 - 16, ""); // expected-warning{{data argument not used}} + + const char s7[] = "%s string %s %s"; + printf(s7 + 3, ""); // expected-warning{{more '%' conversions than data arguments}} + // expected-note@-2{{format string is defined here}} +} _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits