boga95 updated this revision to Diff 178351. boga95 added a comment. Upload diff with full context.
CHANGES SINCE LAST ACTION https://reviews.llvm.org/D55734/new/ https://reviews.llvm.org/D55734 Files: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp
Index: lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp =================================================================== --- lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp +++ lib/StaticAnalyzer/Checkers/GenericTaintChecker.cpp @@ -23,6 +23,8 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include <climits> +#include <initializer_list> +#include <utility> using namespace clang; using namespace ento; @@ -69,8 +71,8 @@ static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg); /// Functions defining the attack surface. - typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *, - CheckerContext &C) const; + using FnCheck = ProgramStateRef (GenericTaintChecker::*)( + const CallExpr *, CheckerContext &C) const; ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const; ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const; ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const; @@ -100,7 +102,7 @@ bool generateReportIfTainted(const Expr *E, const char Msg[], CheckerContext &C) const; - typedef SmallVector<unsigned, 2> ArgVector; + using ArgVector = SmallVector<unsigned, 2>; /// A struct used to specify taint propagation rules for a function. /// @@ -112,30 +114,27 @@ /// ReturnValueIndex is added to the dst list, the return value will be /// tainted. struct TaintPropagationRule { + enum class VariadicType { None, Src, Dst }; + /// List of arguments which can be taint sources and should be checked. ArgVector SrcArgs; /// List of arguments which should be tainted on function return. ArgVector DstArgs; - // TODO: Check if using other data structures would be more optimal. - - TaintPropagationRule() {} - - TaintPropagationRule(unsigned SArg, - unsigned DArg, bool TaintRet = false) { - SrcArgs.push_back(SArg); - DstArgs.push_back(DArg); - if (TaintRet) - DstArgs.push_back(ReturnValueIndex); - } - - TaintPropagationRule(unsigned SArg1, unsigned SArg2, - unsigned DArg, bool TaintRet = false) { - SrcArgs.push_back(SArg1); - SrcArgs.push_back(SArg2); - DstArgs.push_back(DArg); - if (TaintRet) - DstArgs.push_back(ReturnValueIndex); - } + /// Index for the first variadic parameter if exist. + unsigned VariadicIndex; + /// Show when a function has variadic parameters. If it has, it mark all + /// of them as source or destination. + VariadicType VarType; + + TaintPropagationRule() + : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None) {} + + TaintPropagationRule(std::initializer_list<unsigned> &&Src, + std::initializer_list<unsigned> &&Dst, + VariadicType Var = VariadicType::None, + unsigned VarIndex = InvalidArgIndex) + : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)), + VariadicIndex(VarIndex), VarType(Var) {} /// Get the propagation rule for a given function. static TaintPropagationRule @@ -144,9 +143,12 @@ CheckerContext &C); inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); } - inline void addDstArg(unsigned A) { DstArgs.push_back(A); } + inline void addDstArg(unsigned A) { DstArgs.push_back(A); } - inline bool isNull() const { return SrcArgs.empty(); } + inline bool isNull() const { + return SrcArgs.empty() && DstArgs.empty() && + VariadicType::None == VarType; + } inline bool isDestinationArgument(unsigned ArgNum) const { return (std::find(DstArgs.begin(), @@ -169,7 +171,6 @@ /// Pre-process a function which propagates taint according to the /// taint rule. ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const; - }; }; @@ -206,26 +207,27 @@ // value as tainted even if it's just a pointer, pointing to tainted data. // Check for exact name match for functions without builtin substitutes. - TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name) - .Case("atoi", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atol", TaintPropagationRule(0, ReturnValueIndex)) - .Case("atoll", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getc", TaintPropagationRule(0, ReturnValueIndex)) - .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex)) - .Case("getw", TaintPropagationRule(0, ReturnValueIndex)) - .Case("toupper", TaintPropagationRule(0, ReturnValueIndex)) - .Case("tolower", TaintPropagationRule(0, ReturnValueIndex)) - .Case("strchr", TaintPropagationRule(0, ReturnValueIndex)) - .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex)) - .Case("read", TaintPropagationRule(0, 2, 1, true)) - .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true)) - .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true)) - .Case("fgets", TaintPropagationRule(2, 0, true)) - .Case("getline", TaintPropagationRule(2, 0)) - .Case("getdelim", TaintPropagationRule(3, 0)) - .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex)) - .Default(TaintPropagationRule()); + TaintPropagationRule Rule = + llvm::StringSwitch<TaintPropagationRule>(Name) + .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex})) + .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex})) + .Case("pread", TaintPropagationRule({0, 2, 3}, {1, ReturnValueIndex})) + .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex})) + .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex})) + .Case("getline", TaintPropagationRule({2}, {0})) + .Case("getdelim", TaintPropagationRule({3}, {0})) + .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex})) + .Default(TaintPropagationRule()); if (!Rule.isNull()) return Rule; @@ -233,18 +235,18 @@ // Check if it's one of the memory setting/copying functions. // This check is specialized but faster then calling isCLibraryFunction. unsigned BId = 0; - if ( (BId = FDecl->getMemoryFunctionKind()) ) - switch(BId) { + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy: case Builtin::BIstrncat: - return TaintPropagationRule(1, 2, 0, true); + return TaintPropagationRule({1, 2}, {0, ReturnValueIndex}); case Builtin::BIstrlcpy: case Builtin::BIstrlcat: - return TaintPropagationRule(1, 2, 0, false); + return TaintPropagationRule({1, 2}, {0}); case Builtin::BIstrndup: - return TaintPropagationRule(0, 1, ReturnValueIndex); + return TaintPropagationRule({0, 1}, {ReturnValueIndex}); default: break; @@ -252,20 +254,23 @@ // Process all other functions which could be defined as builtins. if (Rule.isNull()) { - if (C.isCLibraryFunction(FDecl, "snprintf") || - C.isCLibraryFunction(FDecl, "sprintf")) - return TaintPropagationRule(InvalidArgIndex, 0, true); + if (C.isCLibraryFunction(FDecl, "snprintf")) + return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src, + 3); + else if (C.isCLibraryFunction(FDecl, "sprintf")) + return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src, + 2); else if (C.isCLibraryFunction(FDecl, "strcpy") || C.isCLibraryFunction(FDecl, "stpcpy") || C.isCLibraryFunction(FDecl, "strcat")) - return TaintPropagationRule(1, 0, true); + return TaintPropagationRule({1}, {0, ReturnValueIndex}); else if (C.isCLibraryFunction(FDecl, "bcopy")) - return TaintPropagationRule(0, 2, 1, false); + return TaintPropagationRule({0, 2}, {1}); else if (C.isCLibraryFunction(FDecl, "strdup") || C.isCLibraryFunction(FDecl, "strdupa")) - return TaintPropagationRule(0, ReturnValueIndex); + return TaintPropagationRule({0}, {ReturnValueIndex}); else if (C.isCLibraryFunction(FDecl, "wcsdup")) - return TaintPropagationRule(0, ReturnValueIndex); + return TaintPropagationRule({0}, {ReturnValueIndex}); } // Skipping the following functions, since they might be used for cleansing @@ -324,7 +329,6 @@ if (!State) return; C.addTransition(State); - } bool GenericTaintChecker::propagateFromPre(const CallExpr *CE, @@ -338,9 +342,8 @@ if (TaintArgs.isEmpty()) return false; - for (llvm::ImmutableSet<unsigned>::iterator - I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { - unsigned ArgNum = *I; + for (auto I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) { + unsigned ArgNum = *I; // Special handling for the tainted return value. if (ArgNum == ReturnValueIndex) { @@ -352,7 +355,7 @@ // tainted after the call. if (CE->getNumArgs() < (ArgNum + 1)) return false; - const Expr* Arg = CE->getArg(ArgNum); + const Expr *Arg = CE->getArg(ArgNum); Optional<SVal> V = getPointedToSVal(C, Arg); if (V) State = State->addTaint(*V); @@ -404,7 +407,8 @@ C.addTransition(State); } -bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{ +bool GenericTaintChecker::checkPre(const CallExpr *CE, + CheckerContext &C) const { if (checkUncontrolledFormatString(CE, C)) return true; @@ -458,53 +462,31 @@ // Check for taint in arguments. bool IsTainted = false; - for (ArgVector::const_iterator I = SrcArgs.begin(), - E = SrcArgs.end(); I != E; ++I) { + for (ArgVector::const_iterator I = SrcArgs.begin(), E = SrcArgs.end(); I != E; + ++I) { unsigned ArgNum = *I; - - if (ArgNum == InvalidArgIndex) { - // Check if any of the arguments is tainted, but skip the - // destination arguments. - for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { - if (isDestinationArgument(i)) - continue; - if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) - break; - } - break; - } - if (CE->getNumArgs() < (ArgNum + 1)) return State; if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C))) break; } + + // Check for taint in variadic arguments. + if (!IsTainted && VariadicType::Src == VarType) { + // Check if any of the arguments is tainted + for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C))) + break; + } + } + if (!IsTainted) return State; // Mark the arguments which should be tainted after the function returns. - for (ArgVector::const_iterator I = DstArgs.begin(), - E = DstArgs.end(); I != E; ++I) { + for (ArgVector::const_iterator I = DstArgs.begin(), E = DstArgs.end(); I != E; + ++I) { unsigned ArgNum = *I; - - // Should we mark all arguments as tainted? - if (ArgNum == InvalidArgIndex) { - // For all pointer and references that were passed in: - // If they are not pointing to const data, mark data as tainted. - // TODO: So far we are just going one level down; ideally we'd need to - // recurse here. - for (unsigned int i = 0; i < CE->getNumArgs(); ++i) { - const Expr *Arg = CE->getArg(i); - // Process pointer argument. - const Type *ArgTy = Arg->getType().getTypePtr(); - QualType PType = ArgTy->getPointeeType(); - if ((!PType.isNull() && !PType.isConstQualified()) - || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) - State = State->add<TaintArgsOnPostVisit>(i); - } - continue; - } - // Should mark the return value? if (ArgNum == ReturnValueIndex) { State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex); @@ -516,10 +498,26 @@ State = State->add<TaintArgsOnPostVisit>(ArgNum); } + // Mark all variadic arguments tainted if present. + if (VariadicType::Dst == VarType) { + // For all pointer and references that were passed in: + // If they are not pointing to const data, mark data as tainted. + // TODO: So far we are just going one level down; ideally we'd need to + // recurse here. + for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) { + const Expr *Arg = CE->getArg(i); + // Process pointer argument. + const Type *ArgTy = Arg->getType().getTypePtr(); + QualType PType = ArgTy->getPointeeType(); + if ((!PType.isNull() && !PType.isConstQualified()) || + (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) + State = State->add<TaintArgsOnPostVisit>(i); + } + } + return State; } - // If argument 0 (file descriptor) is tainted, all arguments except for arg 0 // and arg 1 should get taint. ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE, @@ -539,7 +537,6 @@ return nullptr; } - // If argument 0(protocol domain) is network, the return value should get taint. ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE, CheckerContext &C) const { @@ -558,7 +555,7 @@ } ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE, - CheckerContext &C) const { + CheckerContext &C) const { ProgramStateRef State = C.getState(); if (CE->getNumArgs() < 2) return State; @@ -605,11 +602,11 @@ if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) { D = D->getCanonicalDecl(); if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) - if (const PointerType * PtrTy = + if (const PointerType *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr())) - if (PtrTy->getPointeeType().getCanonicalType() == - C.getASTContext().getFILEType().getCanonicalType()) - return true; + if (PtrTy->getPointeeType().getCanonicalType() == + C.getASTContext().getFILEType().getCanonicalType()) + return true; } return false; } @@ -667,8 +664,8 @@ return false; } -bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE, - CheckerContext &C) const{ +bool GenericTaintChecker::checkUncontrolledFormatString( + const CallExpr *CE, CheckerContext &C) const { // Check if the function contains a format string argument. unsigned int ArgNum = 0; if (!getPrintfFormatArgumentNum(CE, C, ArgNum)) @@ -679,8 +676,7 @@ MsgUncontrolledFormatString, C); } -bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, - StringRef Name, +bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name, CheckerContext &C) const { // TODO: It might make sense to run this check on demand. In some cases, // we should check if the environment has been cleansed here. We also might @@ -712,8 +708,8 @@ // If the function has a buffer size argument, set ArgNum. unsigned ArgNum = InvalidArgIndex; unsigned BId = 0; - if ( (BId = FDecl->getMemoryFunctionKind()) ) - switch(BId) { + if ((BId = FDecl->getMemoryFunctionKind())) + switch (BId) { case Builtin::BImemcpy: case Builtin::BImemmove: case Builtin::BIstrncpy:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits