ASDenysPetrov updated this revision to Diff 388552. ASDenysPetrov added a comment.
Rebased. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D103096/new/ https://reviews.llvm.org/D103096 Files: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp clang/lib/StaticAnalyzer/Core/SymbolManager.cpp clang/test/Analysis/symbol-integral-cast.cpp
Index: clang/test/Analysis/symbol-integral-cast.cpp =================================================================== --- /dev/null +++ clang/test/Analysis/symbol-integral-cast.cpp @@ -0,0 +1,374 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -analyzer-config eagerly-assume=false -analyzer-config support-symbolic-integer-casts=true -verify %s + +template <typename T> +void clang_analyzer_eval(T); +void clang_analyzer_warnIfReached(); + +typedef short int16_t; +typedef int int32_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +void test1(int x) { + // Even if two lower bytes of `x` equal to zero, it doesn't mean that + // the entire `x` is zero. We are not able to know the exact value of x. + // It can be one of 65536 possible values like [0, 65536, 131072, ...] + // and so on. To avoid huge range sets we still assume `x` in the range + // [INT_MIN, INT_MAX]. + if (!(short)x) { + if (!x) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test2(int x) { + // If two lower bytes of `x` equal to zero, and we know x to be 65537, + // which is not truncated to short as zero. Thus the branch is infisible. + short s = x; + if (!s) { + if (x == 65537) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test3(int x, short s) { + s = x; + if ((short)x > -10 && s < 10) { + if (x > 0 && x < 10) { + // If the range of the whole variable was constrained then reason again + // about truncated bytes to make the ranges more precise. + clang_analyzer_eval((short)x <= 0); // expected-warning {{FALSE}} + } + } +} + +void test4(unsigned x) { + if ((char)x > 8) { + // Constraint the range of the lowest byte of `x` to [9, CHAR_MAX]. + // The original range of `x` still remains [0, UINT_MAX]. + clang_analyzer_eval((char)x < 42); // expected-warning {{UNKNOWN}} + if (x < 42) { + // Constraint the original range to [0, 42] and update (re-constraint) + // the range of the lowest byte of 'x' to [9, 42]. + clang_analyzer_eval((char)x < 42); // expected-warning {{TRUE}} + } + } +} + +void test5(unsigned x) { + if ((char)x > -10 && (char)x < 10) { + if ((short)x == 8) { + // If the range of higher bytes(short) was constrained then reason again + // about smaller truncated ranges(char) to make it more precise. + clang_analyzer_eval((char)x == 8); // expected-warning {{TRUE}} + clang_analyzer_eval((short)x == 8); // expected-warning {{TRUE}} + // We still assume full version of `x` in the range [INT_MIN, INT_MAX]. + clang_analyzer_eval(x == 8); // expected-warning {{UNKNOWN}} + } + } +} + +void test6(int x) { + // Even if two lower bytes of `x` less than zero, it doesn't mean that `x` + // can't be greater than zero. Thence we don't change the native range of + // `x` and this branch is feasible. + if (x > 0) + if ((short)x < 0) + clang_analyzer_eval(x > 0); // expected-warning {{TRUE}} +} + +void test7(int x) { + // The range of two lower bytes of `x` [1, SHORT_MAX] is enough to cover + // all possible values of char [CHAR_MIN, CHAR_MAX]. So the lowest byte + // can be lower than zero. + if ((short)x > 0) { + if ((char)x < 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test8(int x) { + // Promotion from `signed int` to `signed long long` also reasoning about the + // original range, because we know the fact that even after promotion it + // remains in the range [INT_MIN, INT_MAX]. + if ((long long)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{TRUE}} +} + +void test9(signed int x) { + // Any cast `signed` to `unsigned` produces an unsigned range, which is + // [0, UNSIGNED_MAX] and can not be lower than zero. + if ((unsigned long long)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((unsigned int)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((unsigned short)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((unsigned char)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +} + +void test10(unsigned int x, signed char sc) { + // Promotion from `unsigned` to `signed` produces a signed range, + // which is able to cover all the values of the original, + // so that such cast is not lower than zero. + if ((signed long long)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + // Any other cast(conversion or truncation) from `unsigned` to `signed` + // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX] + // and can be lower than zero. + if ((signed int)x < 0) // explicit cast + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + signed short ss = x; // initialization + if (ss < 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + sc = x; // assignment + if (sc < 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +} + +void test11(unsigned int x) { + // Promotion from 'unsigned' to 'signed' entirely covers the original range. + // Thence such cast is not lower than zero and the `true` branch is + // infiseable. But it doesn't affect the original range, which still remains + // as [0, UNSIGNED_MAX]. + if ((signed long long)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} + + // Any other cast(conversion or truncation) from `unsigned` to `signed` + // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]. But it doesn't + // affect the original range, which still remains as [0, UNSIGNED_MAX]. + if ((signed int)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} + + if ((signed short)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} + + if ((signed char)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} +} + +void test12(int x, char c) { + if (x >= 5308) { + if (x <= 5419) { + // Truncation on assignment: int[5308, 5419] -> char[-68, 43] + c = x; + clang_analyzer_eval(-68 <= c && c <= 43); // expected-warning {{TRUE}} + + if (c < 50) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + // Truncation on initializaion: int[5308, 5419] -> char[-68, 43] + char c1 = x; + clang_analyzer_eval(-68 <= c1 && c1 <= 43); // expected-warning {{TRUE}} + } + } +} + +void test13(int x) { + if (x > 913440767 && x < 913440769) { // 0x36720000 + + if ((short)x) // Truncation: int[913440768] -> short[0] + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((short)x != 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test14(int x) { + if (x >= -1569193983 && x <= 578290016) { + // The big range of `x` covers all possible values of short. + // Truncation: int[-1569193983, 578290016] -> short[-32768, 32767] + if ((short)x > 0) { + clang_analyzer_eval(-1569193983 <= x && x <= 578290016); // expected-warning {{TRUE}} + short s = x; + clang_analyzer_eval(-32768 <= s && s <= 32767); // expected-warning {{TRUE}} + } + } +} + +void test15(int x) { + if (x >= -1569193983 && x <= -1569193871) { // [0xA2780001, 0xA2780071] + // The small range of `x` covers only several values of short. + // Truncation: int[-1569193983, -1569193871] -> short[1, 113] + if ((short)x) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + if ((short)x > 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + if ((short)x < 114) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + } +} + +void test16(char x) { + if (x < 0) + clang_analyzer_eval(-128 <= x && x < 0); // expected-warning {{TRUE}} + else + clang_analyzer_eval(0 <= x && x <= 127); // expected-warning {{TRUE}} +} + +void test17(char x) { + if (-11 <= x && x <= -10) { + unsigned u = x; + // Conversion: char[-11, -10] -> unsigned int[4294967285, 4294967286] + clang_analyzer_eval(4294967285 <= u && u <= 4294967286); // expected-warning {{TRUE}} + unsigned short us = x; + // Conversion: char[-11, -10] -> unsigned short[65525, 65526] + clang_analyzer_eval(65525 <= us && us <= 65526); // expected-warning {{TRUE}} + unsigned char uc = x; + // Conversion: char[-11, -10] -> unsigned char[245, 246] + clang_analyzer_eval(245 <= uc && uc <= 246); // expected-warning {{TRUE}} + } +} + +void test18(char c, short s, int i) { + // Any char value always is less then 1000. + int OneThousand = 1000; + c = i; + if (c < OneThousand) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + // Any short value always is greater then 40000. + int MinusFourtyThousands = -40000; + s = i; + if (s > MinusFourtyThousands) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning +} + +void test19(char x, short y) { + if (-43 <= x && x <= -42) { // x[-42, -43] + y = 42; + clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}} + + clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + } +} + +void test20(char x, short y) { + if (42 <= y && y <= 43) { // y[42, 43] + x = -42; + clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}} + + clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + } +} + +void test21(unsigned x) { + if (x > 42) { + // Unsigned range can generate two signed ranges. + // Conversion: unsigned[43, 4294967295] -> int[-2147483648, -1]U[43, 2147483647] + int i = x; // initialization + clang_analyzer_eval(-1 < i && i < 43); // expected-warning {{FALSE}} + } +} + +void test22(int x, unsigned u) { + if (x > -42) { + // Signed range can generate two unsigned ranges. + // Conversion: int[-41, 2147483647] -> unsigned[0, 2147483647]U[4294967255, 4294967295] + u = x; // assignment + clang_analyzer_eval(2147483647 < u && u < 4294967255); // expected-warning {{FALSE}} + } +} + +// PR51036 +void test23(signed char c) { + if ((unsigned int)c <= 200) { + // Conversion: char[0, 127] -> unsigned int[0, 127] + clang_analyzer_eval(0 <= c && c <= 127); // expected-warning {{TRUE}} + } +} + +void test24(int x, int y) { + if (x == y) { + short s = x; + if (!s) { + if (y == 65537) + // FIXME: This should not warn. Support EquivalenceClasses. + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } + } +} Index: clang/lib/StaticAnalyzer/Core/SymbolManager.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/SymbolManager.cpp +++ clang/lib/StaticAnalyzer/Core/SymbolManager.cpp @@ -543,3 +543,10 @@ return VarContext->isParentOf(CurrentContext); } + +SymbolRef SymExpr::ignoreCasts() const { + SymbolRef Sym = this; + while (isa<SymbolCast>(Sym)) + Sym = cast<SymbolCast>(Sym)->getOperand(); + return Sym; +} Index: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -532,8 +532,11 @@ // We only handle LHS as simple symbols or SymIntExprs. SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol(); + // Unwrap SymbolCast trying to find SymIntExpr inside. + SymbolRef S = Sym->ignoreCasts(); + // LHS is a symbolic expression. - if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) { + if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(S)) { // Is this a logical not? (!x is represented as x == 0.) if (op == BO_EQ && rhs.isZeroConstant()) { Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -13,6 +13,7 @@ #include "clang/Basic/JsonSupport.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h" @@ -20,8 +21,8 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -1048,6 +1049,13 @@ return State->set<ConstraintRange>(Class, Constraint); } +LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State, + SymbolRef Sym, + RangeSet Constraint) { + return State->set<ConstraintRange>(EquivalenceClass::find(State, Sym), + Constraint); +} + LLVM_NODISCARD ProgramStateRef setConstraints(ProgramStateRef State, ConstraintRangeTy Constraints) { return State->set<ConstraintRange>(Constraints); @@ -1179,6 +1187,60 @@ // Symbolic reasoning logic //===----------------------------------------------------------------------===// +/// This class is used for integral symbolic casts feature as a helper instance. +/// +/// It represents a list of integral types of different sizes going in ascending +/// order from 1 to 8 bytes. It aggregates several functions for convenience of +/// usage. We can iterate through the types and find a type by size (bit width). +/// +/// We use FOUR integer types: `int8`, `int16`, `int32`, `int64`, because we +/// only support casts between types, which are lower or equal to 64-bit width. +/// +/// We use these types for creating SymbolCast to find constraints in the +/// constraint map. This allows to canonize a `key-value` to store and retrieve +/// constraints instead of brute force. +/// +/// We don't care about the type signedness. Signedness is just a way of bits +/// representation. We just care about saving data. It's enough for us to store +/// specific constraints for the type for a specific bit width. We never use +/// retrieved constraint directly. We always use RangeSet::Factory::castTo to +/// get ranges for a needed type (signed or unsigned) after retrieving. +class NominalTypeList { + CanQualType Types[4]; + +public: + using Iterator = CanQualType *; + + void init(ASTContext &C) { + Types[0] = C.Char8Ty; + Types[1] = C.Char16Ty; + Types[2] = C.Char32Ty; + Types[3] = C.LongLongTy; + } + Iterator findByWidth(uint32_t Width) { + int index = 4; + switch (Width) { + case 8: + index = 0; + break; + case 16: + index = 1; + break; + case 32: + index = 2; + break; + case 64: + index = 3; + }; + return Types + index; + } + Iterator begin() { return std::begin(Types); } + Iterator end() { return std::end(Types); } +}; + +// We should initialize NTL with `init` method before use. +static NominalTypeList NTL; + /// A little component aggregating all of the reasoning we have about /// the ranges of symbolic expressions. /// @@ -1194,6 +1256,69 @@ return Inferrer.infer(Origin); } + RangeSet VisitSymbolCast(const SymbolCast *Sym) { + AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions(); + if (!Opts.ShouldSupportSymbolicIntegerCasts) + return VisitSymExpr(Sym); + + // Unwrap symbol to get an underlying(root) symbol. + // Store every next type except the inner(original) one. + SmallVector<QualType, 2> Types; + uint32_t MinBitWidth = UINT32_MAX; + SymbolRef RootSym = Sym; + ASTContext &C = ValueFactory.getContext(); + do { + // We only handle integral cast, when all the types are integrals. + // Otherwise, pass the given symbol to VisitSymExpr. + QualType T = RootSym->getType(); + if (!T->isIntegralOrEnumerationType()) + return VisitSymExpr(Sym); + + MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T)); + Types.push_back(T); + RootSym = cast<SymbolCast>(RootSym)->getOperand(); + } while (isa<SymbolCast>(RootSym)); + + QualType RootTy = RootSym->getType(); + const uint32_t RootBitWidth = C.getIntWidth(RootTy); + + // Check if we have any known truncated ranges of the root symbol. + // Truncated ranges usually are more precise then the original one. + // The more truncated is the range the more precise it should be. + // Example: Consider the given SymbolCast is (int8)(int64)(int16){int32 x}. + // `int8` - is the smallest type. Than the range will fit in it. + // Traverse through NTL types, that are smaller then the root type: + // [int8, int32). + const RangeSet *RSPtr = nullptr; + auto It = NTL.findByWidth(MinBitWidth); + auto E = NTL.findByWidth(RootBitWidth); + for (; !RSPtr && It < E; ++It) { + // Produce canonical symbols with the nominal type. + SymbolRef S = + State->getSymbolManager().getCastSymbol(RootSym, RootTy, *It); + // Find the first constraint and exit the loop. + RSPtr = getConstraint(State, S); + } + // If we didn't find any truncated ranges, look for the constraint for + // the root type. + // Example (cont.): Use the root symbol `{int32 x}`. + if (!RSPtr) + RSPtr = getConstraint(State, RootSym); + // If there's no existing range, create it based on the root type. + // Example (cont.): Make range based on `int32`. + RangeSet RS = RSPtr ? *RSPtr : infer(RootTy); + + // Cast the range to the cast types from inner to outer one by one. + // Example (cont.): Go through 3 types from `int16` to `int8`. + auto TypesReversedRange = llvm::make_range(Types.rbegin(), Types.rend()); + for (const QualType T : TypesReversedRange) + RS = RangeFactory.castTo(RS, T); + + // Finally we got a range of Sym->getType() type. + // Example (cont.): Type of range is `int8`. + return RS; + } + RangeSet VisitSymExpr(SymbolRef Sym) { // If we got to this function, the actual type of the symbolic // expression is not supported for advanced inference. @@ -1751,7 +1876,9 @@ class RangeConstraintManager : public RangedConstraintManager { public: RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) - : RangedConstraintManager(EE, SVB), F(getBasicVals()) {} + : RangedConstraintManager(EE, SVB), F(getBasicVals()) { + NTL.init(SVB.getContext()); + } //===------------------------------------------------------------------===// // Implementation for interface from ConstraintManager. @@ -1862,7 +1989,7 @@ /// Derived class can control which types we handle by defining methods of the /// following form: /// -/// bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym, +/// bool assign${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym, /// CONSTRAINT Constraint); /// /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.) @@ -1980,12 +2107,15 @@ } inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint); + inline bool assignSymExprToRangeSet(const SymExpr *Sym, RangeSet Constraint); inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym, RangeSet Constraint) { return handleRemainderOp(Sym, Constraint); } inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym, RangeSet Constraint); + inline bool assignSymbolCastToRangeSet(const SymbolCast *Sym, + RangeSet Constraint); private: ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder, @@ -2058,8 +2188,8 @@ LLVM_NODISCARD Optional<bool> interpreteAsBool(RangeSet Constraint) { assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here"); - if (Constraint.getConcreteValue()) - return !Constraint.getConcreteValue()->isZero(); + if (const llvm::APSInt *Int = Constraint.getConcreteValue()) + return !Int->isZero(); if (!Constraint.containsZero()) return true; @@ -2067,11 +2197,192 @@ return llvm::None; } + void updateExistingConstraints(SymbolRef Sym, RangeSet R); + SymbolRef getProperSymbol(SymbolRef Sym); + ProgramStateRef State; SValBuilder &Builder; RangeSet::Factory &RangeFactory; }; +//===----------------------------------------------------------------------===// +// ConstraintAssignor implementation details +//===----------------------------------------------------------------------===// + +bool ConstraintAssignor::assignSymExprToRangeSet(const SymExpr *Sym, + RangeSet Constraint) { + AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions(); + if (Opts.ShouldSupportSymbolicIntegerCasts || + !Sym->getType()->isIntegralOrEnumerationType()) { + updateExistingConstraints(Sym, Constraint); + if (!State) + return false; + } + + // Next assignments is based on the fact that Constraint is a concrete value. + // Make sure of this. + if (!Constraint.getConcreteValue()) + return true; + + llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses; + // Iterate over all equivalence classes and try to simplify them. + ClassMembersTy Members = State->get<ClassMembers>(); + for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) { + EquivalenceClass Class = ClassToSymbolSet.first; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; + SimplifiedClasses.insert(Class); + } + + // Trivial equivalence classes (those that have only one symbol member) are + // not stored in the State. Thus, we must skim through the constraints as + // well. And we try to simplify symbols in the constraints. + ConstraintRangeTy Constraints = State->get<ConstraintRange>(); + for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) { + EquivalenceClass Class = ClassConstraint.first; + if (SimplifiedClasses.count(Class)) // Already simplified. + continue; + State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class); + if (!State) + return false; + } + + return true; +} + +bool ConstraintAssignor::assignSymbolCastToRangeSet(const SymbolCast *Sym, + RangeSet R) { + AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions(); + // If symbol is not integral or the option is off, we need another handler. + if (!Opts.ShouldSupportSymbolicIntegerCasts || + !Sym->getType()->isIntegralOrEnumerationType()) + return false; + + // If range is empty, the branch is infeasible. + if (R.isEmpty()) { + State = nullptr; + return false; + } + + SymbolRef S = getProperSymbol(Sym); + // If symbol is not integral, we need another handler. + if (!S) + return true; + + R = RangeFactory.castTo(R, S->getType()); + updateExistingConstraints(S, R); + State = setConstraint(State, S, R); + + return false; +} + +/// Return a symbol which is the best canidate to save it in the constraint +/// map. We should correct symbol because in case of truncation cast we can +/// only reason about truncated bytes but not the whole value. E.g. (char)(int +/// x), we can store constraints for the first lower byte but we still don't +/// know the root value. Also in case of promotion or converion we should +/// store the root value instead of cast symbol, because we can always get +/// a correct range using `castTo` metho. And we are not intrested in any +/// constraints of cast symbol but the root symbol in `if` expression +/// or any bifurcation. We can return: +/// - a new symbol based on the root, in case of a truncation, +/// - a root symbol if it is not a truncation. +/// +/// \param Sym -- a given symbol. +/// \returns a corrected symbol based on a given one. Symbol is null if the +/// given symbol is unsupported. We support only integral casts. +SymbolRef ConstraintAssignor::getProperSymbol(SymbolRef Sym) { + // We don't need to do any extra work for non-SymbolCast's. + if (!isa<SymbolCast>(Sym)) + return Sym; + + // Extract a root symbol and compare it to outer types. + ASTContext &C = Builder.getContext(); + SymbolRef RootSym = Sym; + // Get the root symbol. + uint32_t MinBitWidth = UINT32_MAX; + do { + // We only handle integral cast, when all the types are integrals. + // Return `None` in this particular case to notify user that we can not + // handle non-integral SymbolCast. + QualType T = RootSym->getType(); + if (!T->isIntegralOrEnumerationType()) + return nullptr; + MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T)); + RootSym = cast<SymbolCast>(RootSym)->getOperand(); + } while (isa<SymbolCast>(RootSym)); + + // Check for trunation. + QualType RootTy = RootSym->getType(); + uint32_t RootBitWidth = C.getIntWidth(RootTy); + + const bool IsTruncated = (MinBitWidth < RootBitWidth); + if (IsTruncated) { + // Trancation occurred. High bits lost. We can't reason about ranges of + // the original(root) operand in this case, so we should not add it to the + // constraint map. Canonize Sym instead. + // We produce a new symbol using a NTL type equals to the smallest type of + // Sym. For instance: + // - (int)(uchar)x -> (char8)x + // - (long)(ushort)(short)x -> (char16)x + + // Produce a new SymbolCast. + CanQualType Ty = *NTL.findByWidth(MinBitWidth); + RootSym = State->getSymbolManager().getCastSymbol(RootSym, RootTy, Ty); + } + + return RootSym; +} + +/// Update exsiting constraints for all truncated SymbolCasts based on the +/// given symbol which types are less than the current one. +/// For instance, for Sym: +/// - {int8 x} update nothing; +/// - {int16 x} update (int8)x; +/// - {int32 x} update (int8)x, (int16)x; +/// - {int64 x} update (int8)x, (int16)x, (int32)x. +/// +/// FIXME: Update bigger casts. We only can reason about ranges of smaller +/// types, because it would be too complicated to update, say, the entire +/// `int` range if you only have knowledge that its lowest byte has been +/// changed. So we don't touch bigger casts and they may be potentially +/// invalid. For future, for: +/// - {int8 x} update (int16)x, (int32)x, (int64)x; +/// - {int16 x} update (int32)x, (int64)x; +/// - {int32 x} update (int64)x; +/// - {int64 x} update nothing. +/// +/// \param State -- current program state. +/// \param Sym -- a considered symbol. +/// \param R -- a known range for the given symbol. +/// \note: needs check of null state after use. +void ConstraintAssignor::updateExistingConstraints(SymbolRef Sym, RangeSet R) { + unsigned SymBitWidth = Builder.getContext().getIntWidth(Sym->getType()); + // Get a root symbol in case of SymbolCast. + Sym = Sym->ignoreCasts(); + QualType SymTy = Sym->getType(); + auto SmallerNTLTypes = + llvm::make_range(NTL.begin(), NTL.findByWidth(SymBitWidth)); + SymbolManager &SM = State->getSymbolManager(); + for (const QualType T : SmallerNTLTypes) { + // Use NTL typr to create canonical SymbolCast to find an existing + // constraint. + SymbolRef S = SM.getCastSymbol(Sym, SymTy, T); + // If such constraint is found, update it by intersecting. + if (const RangeSet *RS = getConstraint(State, S)) { + RangeSet TruncR = RangeFactory.castTo(R, T); + TruncR = RangeFactory.intersect(*RS, TruncR); + // If intersection is empty, then the branch is infisible. + if (TruncR.isEmpty()) { + State = nullptr; + break; + } + // Update the constraint. + State = setConstraint(State, S, TruncR); + } + } +} bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym, const llvm::APSInt &Constraint) { Index: clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -418,6 +418,12 @@ ProgramStateRef State = C.getState(); + // Unwrap symbolic expression to skip argument casts on function call. + // This is useful when there is no way for overloading function in C + // but we need to pass different types of arguments and + // implicit cast occures. + Sym = Sym->ignoreCasts(); + C.addTransition(C.getState()->set<DenotedSymbols>(Sym, E)); } Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h +++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h @@ -62,6 +62,8 @@ virtual QualType getType() const = 0; virtual void Profile(llvm::FoldingSetNodeID &profile) = 0; + const SymExpr *ignoreCasts() const; + /// Iterator over symbols that the current symbol depends on. /// /// For SymbolData, it's the symbol itself; for expressions, it's the Index: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h =================================================================== --- clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h +++ clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h @@ -135,8 +135,9 @@ " (" + Visit(S->getRHS()) + ")"; } - // TODO: SymbolCast doesn't appear in practice. - // Add the relevant code once it does. + std::string VisitSymbolCast(const SymbolCast *S) { + return "(" + S->getType().getAsString() + ")" + Visit(S->getOperand()); + } std::string VisitSymbolicRegion(const SymbolicRegion *R) { // Explain 'this' object here.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits