ASDenysPetrov updated this revision to Diff 358669. ASDenysPetrov added a comment.
Rebased CHANGES SINCE LAST ACTION https://reviews.llvm.org/D103096/new/ https://reviews.llvm.org/D103096 Files: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp clang/test/Analysis/symbol-integral-cast.cpp
Index: clang/test/Analysis/symbol-integral-cast.cpp =================================================================== --- /dev/null +++ clang/test/Analysis/symbol-integral-cast.cpp @@ -0,0 +1,353 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=debug.ExprInspection -analyzer-config eagerly-assume=false -analyzer-config support-symbolic-integer-casts=true -verify %s + +template <typename T> +void clang_analyzer_eval(T); +void clang_analyzer_warnIfReached(); + +typedef short int16_t; +typedef int int32_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; + +void test1(int x) { + // Even if two lower bytes of `x` equal to zero, it doesn't mean that + // the entire `x` is zero. We are not able to know the exact value of x. + // It can be one of 65536 possible values like [0, 65536, 131072, ...] + // and so on. To avoid huge range sets we still assume `x` in the range + // [INT_MIN, INT_MAX]. + if (!(short)x) { + if (!x) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test2(int x) { + // If two lower bytes of `x` equal to zero, and we know x to be 65537, + // which is not truncated to short as zero. Thus the branch is infisible. + short s = x; + if (!s) { + if (x == 65537) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test3(int x, short s) { + s = x; + if ((short)x > -10 && s < 10) { + if (x > 0 && x < 10) { + // If the range of the whole variable was constrained then reason again + // about truncated bytes to make the ranges more precise. + clang_analyzer_eval((short)x <= 0); // expected-warning {{FALSE}} + } + } +} + +void test4(unsigned x) { + if ((char)x > 8) { + // Constraint the range of the lowest byte of `x` to [9, CHAR_MAX]. + // The original range of `x` still remains [0, UINT_MAX]. + clang_analyzer_eval((char)x < 42); // expected-warning {{UNKNOWN}} + if (x < 42) { + // Constraint the original range to [0, 42] and update (re-constraint) + // the range of the lowest byte of 'x' to [9, 42]. + clang_analyzer_eval((char)x < 42); // expected-warning {{TRUE}} + } + } +} + +void test5(unsigned x) { + if ((char)x > -10 && (char)x < 10) { + if ((short)x == 8) { + // If the range of higher bytes(short) was constrained then reason again + // about smaller truncated ranges(char) to make it more precise. + clang_analyzer_eval((char)x == 8); // expected-warning {{TRUE}} + clang_analyzer_eval((short)x == 8); // expected-warning {{TRUE}} + // We still assume full version of `x` in the range [INT_MIN, INT_MAX]. + clang_analyzer_eval(x == 8); // expected-warning {{UNKNOWN}} + } + } +} + +void test6(int x) { + // Even if two lower bytes of `x` less than zero, it doesn't mean that `x` + // can't be greater than zero. Thence we don't change the native range of + // `x` and this branch is feasible. + if (x > 0) + if ((short)x < 0) + clang_analyzer_eval(x > 0); // expected-warning {{TRUE}} +} + +void test7(int x) { + // The range of two lower bytes of `x` [1, SHORT_MAX] is enough to cover + // all possible values of char [CHAR_MIN, CHAR_MAX]. So the lowest byte + // can be lower than zero. + if ((short)x > 0) { + if ((char)x < 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test8(int x) { + // Promotion from `signed int` to `signed long long` also reasoning about the + // original range, because we know the fact that even after promotion it + // remains in the range [INT_MIN, INT_MAX]. + if ((long long)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{TRUE}} +} + +void test9(signed int x) { + // Any cast `signed` to `unsigned` produces an unsigned range, which is + // [0, UNSIGNED_MAX] and can not be lower than zero. + if ((unsigned long long)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((unsigned int)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((unsigned short)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((unsigned char)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +} + +void test10(unsigned int x, signed char sc) { + // Promotion from `unsigned` to `signed` produces a signed range, + // which is able to cover all the values of the original, + // so that such cast is not lower than zero. + if ((signed long long)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + // Any other cast(conversion or truncation) from `unsigned` to `signed` + // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX] + // and can be lower than zero. + if ((signed int)x < 0) // explicit cast + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + signed short ss = x; // initialization + if (ss < 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + sc = x; // assignment + if (sc < 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} +} + +void test11(unsigned int x) { + // Promotion from 'unsigned' to 'signed' entirely covers the original range. + // Thence such cast is not lower than zero and the `true` branch is + // infiseable. But it doesn't affect the original range, which still remains + // as [0, UNSIGNED_MAX]. + if ((signed long long)x < 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} + + // Any other cast(conversion or truncation) from `unsigned` to `signed` + // produces a signed range, which is [SIGNED_MIN, SIGNED_MAX]. But it doesn't + // affect the original range, which still remains as [0, UNSIGNED_MAX]. + if ((signed int)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} + + if ((signed short)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} + + if ((signed char)x < 0) + clang_analyzer_eval(x < 0); // expected-warning {{FALSE}} +} + +void test12(int x, char c) { + if (x >= 5308) { + if (x <= 5419) { + // Truncation on assignment: int[5308, 5419] -> char[-68, 43] + c = x; + clang_analyzer_eval(-68 <= c && c <= 43); // expected-warning {{TRUE}} + + if (c < 50) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + // Truncation on initializaion: int[5308, 5419] -> char[-68, 43] + char c1 = x; + clang_analyzer_eval(-68 <= c1 && c1 <= 43); // expected-warning {{TRUE}} + } + } +} + +void test13(int x) { + if (x > 913440767 && x < 913440769) { // 0x36720000 + + if ((short)x) // Truncation: int[913440768] -> short[0] + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + + if ((short)x != 0) + clang_analyzer_warnIfReached(); // no-warning + else + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + } +} + +void test14(int x) { + if (x >= -1569193983 && x <= 578290016) { + // The big range of `x` covers all possible values of short. + // Truncation: int[-1569193983, 578290016] -> short[-32768, 32767] + if ((short)x > 0) { + clang_analyzer_eval(-1569193983 <= x && x <= 578290016); // expected-warning {{TRUE}} + short s = x; + clang_analyzer_eval(-32768 <= s && s <= 32767); // expected-warning {{TRUE}} + } + } +} + +void test15(int x) { + if (x >= -1569193983 && x <= -1569193871) { // [0xA2780001, 0xA2780071] + // The small range of `x` covers only several values of short. + // Truncation: int[-1569193983, -1569193871] -> short[1, 113] + if ((short)x) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + if ((short)x > 0) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + if ((short)x < 114) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + } +} + +void test16(char x) { + if (x < 0) + clang_analyzer_eval(-128 <= x && x < 0); // expected-warning {{TRUE}} + else + clang_analyzer_eval(0 <= x && x <= 127); // expected-warning {{TRUE}} +} + +void test17(char x) { + if (-11 <= x && x <= -10) { + unsigned u = x; + // Conversion: char[-11, -10] -> unsigned int[4294967285, 4294967286] + clang_analyzer_eval(4294967285 <= u && u <= 4294967286); // expected-warning {{TRUE}} + unsigned short us = x; + // Conversion: char[-11, -10] -> unsigned short[65525, 65526] + clang_analyzer_eval(65525 <= us && us <= 65526); // expected-warning {{TRUE}} + unsigned char uc = x; + // Conversion: char[-11, -10] -> unsigned char[245, 246] + clang_analyzer_eval(245 <= uc && uc <= 246); // expected-warning {{TRUE}} + } +} + +void test18(char c, short s, int i) { + // Any char value always is less then 1000. + int OneThousand = 1000; + c = i; + if (c < OneThousand) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning + + // Any short value always is greater then 40000. + int MinusFourtyThousands = -40000; + s = i; + if (s > MinusFourtyThousands) + clang_analyzer_warnIfReached(); // expected-warning {{REACHABLE}} + else + clang_analyzer_warnIfReached(); // no-warning +} + +void test19(char x, short y) { + if (-43 <= x && x <= -42) { // x[-42, -43] + y = 42; + clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}} + + clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + } +} + +void test20(char x, short y) { + if (42 <= y && y <= 43) { // y[42, 43] + x = -42; + clang_analyzer_eval(int16_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < int32_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < int32_t(y)); // expected-warning {{TRUE}} + + clang_analyzer_eval(int16_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(int32_t(x) < uint16_t(y)); // expected-warning {{TRUE}} + clang_analyzer_eval(int32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < int32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < int32_t(y)); // expected-warning {{FALSE}} + + clang_analyzer_eval(uint16_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint16_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint16_t(y)); // expected-warning {{FALSE}} + clang_analyzer_eval(uint32_t(x) < uint32_t(y)); // expected-warning {{FALSE}} + } +} + +void test21(unsigned x) { + if (x > 42) { + // Unsigned range can generate two signed ranges. + // Conversion: unsigned[43, 4294967295] -> int[-2147483648, -1]U[43, 2147483647] + int i = x; // initialization + clang_analyzer_eval(-1 < i && i < 43); // expected-warning {{FALSE}} + } +} + +void test22(int x, unsigned u) { + if (x > -42) { + // Signed range can generate two unsigned ranges. + // Conversion: int[-41, 2147483647] -> unsigned[0, 2147483647]U[4294967255, 4294967295] + u = x; // assignment + clang_analyzer_eval(2147483647 < u && u < 4294967255); // expected-warning {{FALSE}} + } +} Index: clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp +++ clang/lib/StaticAnalyzer/Core/SimpleSValBuilder.cpp @@ -536,8 +536,11 @@ // We only handle LHS as simple symbols or SymIntExprs. SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol(); + // Unwrap SymbolCast trying to find SymIntExpr inside. + SymbolRef S = Sym->ignoreCasts(); + // LHS is a symbolic expression. - if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) { + if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(S)) { // Is this a logical not? (!x is represented as x == 0.) if (op == BO_EQ && rhs.isZeroConstant()) { Index: clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp =================================================================== --- clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -13,6 +13,7 @@ #include "clang/Basic/JsonSupport.h" #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" +#include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h" @@ -20,8 +21,8 @@ #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/ImmutableSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -1107,6 +1108,60 @@ // Symbolic reasoning logic //===----------------------------------------------------------------------===// +/// This class is used for integral symbolic casts feature as a helper instance. +/// +/// It represents a list of integral types of different sizes going in ascending +/// order from 1 to 8 bytes. It aggregates several functions for convenience of +/// usage. We can iterate through the types and find a type by size (bit width). +/// +/// We use FOUR integer types: `int8`, `int16`, `int32`, `int64`, because we +/// only support casts between types, which are lower or equal to 64-bit width. +/// +/// We use these types for creating SymbolCast to find constraints in the +/// constraint map. This allows to canonize a `key-value` to store and retrieve +/// constraints instead of brute force. +/// +/// We don't care about the type signedness. Signedness is just a way of bits +/// representation. We just care about saving data. It's enough for us to store +/// specific constraints for the type for a specific bit width. We never use +/// retrieved constraint directly. We always use RangeSet::Factory::castTo to +/// get ranges for a needed type (signed or unsigned) after retrieving. +class NominalTypeList { + CanQualType Types[4]; + +public: + using Iterator = CanQualType *; + + void init(ASTContext &C) { + Types[0] = C.Char8Ty; + Types[1] = C.Char16Ty; + Types[2] = C.Char32Ty; + Types[3] = C.LongLongTy; + } + Iterator findByWidth(uint32_t Width) { + int index = 4; + switch (Width) { + case 8: + index = 0; + break; + case 16: + index = 1; + break; + case 32: + index = 2; + break; + case 64: + index = 3; + }; + return Types + index; + } + Iterator begin() { return std::begin(Types); } + Iterator end() { return std::end(Types); } +}; + +// We should initialize NTL with `init` method before use. +static NominalTypeList NTL; + /// A little component aggregating all of the reasoning we have about /// the ranges of symbolic expressions. /// @@ -1122,6 +1177,69 @@ return Inferrer.infer(Origin); } + RangeSet VisitSymbolCast(const SymbolCast *Sym) { + AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions(); + if (!Opts.ShouldSupportSymbolicIntegerCasts) + return VisitSymExpr(Sym); + + // Unwrap symbol to get an underlying(root) symbol. + // Store every next type except the inner(original) one. + SmallVector<QualType, 2> Types; + uint32_t MinBitWidth = UINT32_MAX; + SymbolRef RootSym = Sym; + ASTContext &C = ValueFactory.getContext(); + do { + // We only handle integral cast, when all the types are integrals. + // Otherwise, pass the given symbol to VisitSymExpr. + QualType T = RootSym->getType(); + if (!T->isIntegralOrEnumerationType()) + return VisitSymExpr(Sym); + + MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T)); + Types.push_back(T); + RootSym = cast<SymbolCast>(RootSym)->getOperand(); + } while (isa<SymbolCast>(RootSym)); + + QualType RootTy = RootSym->getType(); + const uint32_t RootBitWidth = C.getIntWidth(RootTy); + + // Check if we have any known truncated ranges of the root symbol. + // Truncated ranges usually are more precise then the original one. + // The more truncated is the range the more precise it should be. + // Example: Consider the given SymbolCast is (int8)(int64)(int16){int32 x}. + // `int8` - is the smallest type. Than the range will fit in it. + // Traverse through NTL types, that are smaller then the root type: + // [int8, int32). + const RangeSet *RSPtr = nullptr; + auto It = NTL.findByWidth(MinBitWidth); + auto E = NTL.findByWidth(RootBitWidth); + for (; !RSPtr && It < E; ++It) { + // Produce canonical symbols with the nominal type. + SymbolRef S = + State->getSymbolManager().getCastSymbol(RootSym, RootTy, *It); + // Find the first constraint and exit the loop. + RSPtr = getConstraint(State, S); + } + // If we didn't find any truncated ranges, look for the constraint for + // the root type. + // Example (cont.): Use the root symbol `{int32 x}`. + if (!RSPtr) + RSPtr = getConstraint(State, RootSym); + // If there's no existing range, create it based on the root type. + // Example (cont.): Make range based on `int32`. + RangeSet RS = RSPtr ? *RSPtr : infer(RootTy); + + // Cast the range to the cast types from inner to outer one by one. + // Example (cont.): Go through 3 types from `int16` to `int8`. + auto TypesReversedRange = llvm::make_range(Types.rbegin(), Types.rend()); + for (const QualType T : TypesReversedRange) + RS = RangeFactory.castTo(RS, T); + + // Finally we got a range of Sym->getType() type. + // Example (cont.): Type of range is `int8`. + return RS; + } + RangeSet VisitSymExpr(SymbolRef Sym) { // If we got to this function, the actual type of the symbolic // expression is not supported for advanced inference. @@ -1873,7 +1991,9 @@ class RangeConstraintManager : public RangedConstraintManager { public: RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) - : RangedConstraintManager(EE, SVB), F(getBasicVals()) {} + : RangedConstraintManager(EE, SVB), F(getBasicVals()) { + NTL.init(SVB.getContext()); + } //===------------------------------------------------------------------===// // Implementation for interface from ConstraintManager. @@ -1949,6 +2069,13 @@ private: RangeSet::Factory F; + std::tuple<ProgramStateRef, SymbolRef, RangeSet> + modifySymbolAndConstraints(ProgramStateRef State, SymbolRef Sym, RangeSet R); + ProgramStateRef updateExistingConstraints(ProgramStateRef State, + SymbolRef Sym, RangeSet R); + Optional<std::pair<SymbolRef, RangeSet>> + getProperSymbolAndConstraint(SymbolRef Sym, RangeSet R); + RangeSet getRange(ProgramStateRef State, SymbolRef Sym); RangeSet getRange(ProgramStateRef State, EquivalenceClass Class); ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym, @@ -2706,6 +2833,159 @@ // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1, // UINT_MAX, 0, 1, and 2. +/// Prepare a proper symbol and ranges to save them into the constraint map. +/// Update existing constraints related to the given symbol if it is an integral +/// one. +/// +/// \param State -- current program state. +/// \param Sym -- a considered symbol. +/// \param R -- a known range for the given symbol. +/// \returns the triple set which matches to the parameters. Each argument +/// corrects (if needed) and returns back. State is null in case of infisible +/// branch. +/// +/// \note: this function is a helper and only invokes in +/// RangeConstraintManager::assume### methods. +std::tuple<ProgramStateRef, SymbolRef, RangeSet> +RangeConstraintManager::modifySymbolAndConstraints(ProgramStateRef State, + SymbolRef Sym, RangeSet R) { + AnalyzerOptions &Opts = State->getAnalysisManager().getAnalyzerOptions(); + if (!Opts.ShouldSupportSymbolicIntegerCasts || + !Sym->getType()->isIntegralOrEnumerationType() || R.isEmpty()) + return {State, Sym, R}; + + auto OptSymRange = getProperSymbolAndConstraint(Sym, R); + // If symbol is not integral, return the triple without handling. + if (!OptSymRange) + return {State, Sym, R}; + + Sym = OptSymRange->first; + R = OptSymRange->second; + + State = updateExistingConstraints(State, Sym, R); + return {State, Sym, R}; +} + +/// Return a pair of a right symbol and ranges to save them in the constraint +/// map. We should correct symbol because in case of truncation cast we can only +/// reason about truncated bytes but not the whole value. E.g. (char)(int x), +/// we can store constraints for the first lower byte but we still don't know +/// the original value. Also in case of promotion or converion we should store +/// the original value with original ranges, instead of cast symbol, because we +/// are not intrested in any constraints of cast symbol but the original symbol +/// in `if` expression or any bifurcation. +/// Technically, we only handle SymbolCast here, since other types are OK +/// and don't need additional handling. +/// We can return: +/// - a new symbol based on the root, in case of a truncation, +/// - a root symbol if it is not a truncation. +/// We also cast and return ranges to the type of a result symbol. +/// +/// \param Sym -- a considered symbol. +/// \param R -- a known range for the given symbol. +Optional<std::pair<SymbolRef, RangeSet>> +RangeConstraintManager::getProperSymbolAndConstraint(SymbolRef Sym, + RangeSet R) { + // We don't need to do any extra work for non-SymbolCast's. + if (!isa<SymbolCast>(Sym)) + return std::make_pair(Sym, R); + + // Extract a root symbol and compare it to outer types. + ASTContext &C = getBasicVals().getContext(); + SymbolRef RootSym = Sym; + // Get the root symbol. + uint32_t MinBitWidth = UINT32_MAX; + do { + // We only handle integral cast, when all the types are integrals. + // Return `None` in this particular case to notify user that we can not + // handle non-integral SymbolCast. + QualType T = RootSym->getType(); + if (!T->isIntegralOrEnumerationType()) + return None; + MinBitWidth = std::min(MinBitWidth, C.getIntWidth(T)); + RootSym = cast<SymbolCast>(RootSym)->getOperand(); + } while (isa<SymbolCast>(RootSym)); + + // Check for trunation. + QualType RootTy = RootSym->getType(); + uint32_t RootBitWidth = C.getIntWidth(RootTy); + const bool IsTruncated = (MinBitWidth < RootBitWidth); + + if (IsTruncated) { + // Trancation occurred. High bits lost. We can't reason about ranges of + // the original(root) operand in this case, so we should not add it to the + // constraint map. Canonize Sym instead. + // We produce a new symbol using a NTL type equals to the smallest type of + // Sym. For instance: + // - (int)(uchar)x -> (char8)x + // - (long)(ushort)(short)x -> (char16)x + + // Make a truncated range. + CanQualType Ty = *NTL.findByWidth(MinBitWidth); + R = F.castTo(R, Ty); + // Produce a new SymbolCast. + RootSym = getSymbolManager().getCastSymbol(RootSym, RootTy, Ty); + } else { + // Promotion or conversion occurred. No bit lost. Make a range for the root + // type. Cast the given range to the type of the root range. + R = F.castTo(R, RootTy); + } + + return std::make_pair(RootSym, R); +} + +/// Update exsiting constraints for all truncated SymbolCasts based on the +/// given symbol which types are less than the current one. +/// For instance, for Sym: +/// - {int8 x} update nothing; +/// - {int16 x} update (int8)x; +/// - {int32 x} update (int8)x, (int16)x; +/// - {int64 x} update (int8)x, (int16)x, (int32)x. +/// +/// FIXME: Update bigger casts. We only can reason about ranges of smaller +/// types, because it would be too complicated to update, say, the entire `int` +/// range if you only have knowledge that its lowest byte has been changed. So +/// we don't touch bigger casts and they may be potentially invalid. +/// For future, for: +/// - {int8 x} update (int16)x, (int32)x, (int64)x; +/// - {int16 x} update (int32)x, (int64)x; +/// - {int32 x} update (int64)x; +/// - {int64 x} update nothing. +/// +/// \param State -- current program state. +/// \param Sym -- a considered symbol. +/// \param R -- a known range for the given symbol. +/// \returns the state with updated constraints. State is null if the branch is +/// infisible. +ProgramStateRef +RangeConstraintManager::updateExistingConstraints(ProgramStateRef State, + SymbolRef Sym, RangeSet R) { + unsigned SymBitWidth = + getBasicVals().getContext().getIntWidth(Sym->getType()); + // Get a root symbol in case of SymbolCast. + Sym = Sym->ignoreCasts(); + auto SmallerNTLTypes = + llvm::make_range(NTL.begin(), NTL.findByWidth(SymBitWidth)); + for (const QualType T : SmallerNTLTypes) { + // Use NTL typr to create canonical SymbolCast to find an existing + // constraint. + SymbolRef S = + State->getSymbolManager().getCastSymbol(Sym, Sym->getType(), T); + // If such constraint is found, update it by intersecting. + if (const RangeSet *RS = getConstraint(State, S)) { + RangeSet TruncR = F.castTo(R, T); + TruncR = F.intersect(*RS, TruncR); + // If intersection is empty, then the branch is infisible. + if (TruncR.isEmpty()) + return nullptr; + // Update the constraint. + State = setRange(State, S, TruncR); + } + } + + return State; +} + ProgramStateRef RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym, const llvm::APSInt &Int, @@ -2719,6 +2999,9 @@ RangeSet New = getRange(St, Sym); New = F.deletePoint(New, Point); + std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New); + if (!St) + return nullptr; return setRange(St, Sym, New); } @@ -2736,6 +3019,9 @@ RangeSet New = getRange(St, Sym); New = F.intersect(New, AdjInt); + std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New); + if (!St) + return nullptr; return setRange(St, Sym, New); } @@ -2773,6 +3059,9 @@ const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymLTRange(St, Sym, Int, Adjustment); + std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New); + if (!St) + return nullptr; return setRange(St, Sym, New); } @@ -2810,6 +3099,9 @@ const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymGTRange(St, Sym, Int, Adjustment); + std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New); + if (!St) + return nullptr; return setRange(St, Sym, New); } @@ -2847,6 +3139,10 @@ const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymGERange(St, Sym, Int, Adjustment); + + std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New); + if (!St) + return nullptr; return setRange(St, Sym, New); } @@ -2891,6 +3187,10 @@ const llvm::APSInt &Int, const llvm::APSInt &Adjustment) { RangeSet New = getSymLERange(St, Sym, Int, Adjustment); + + std::tie(St, Sym, New) = modifySymbolAndConstraints(St, Sym, New); + if (!St) + return nullptr; return setRange(St, Sym, New); } Index: clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp =================================================================== --- clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp +++ clang/lib/StaticAnalyzer/Checkers/ExprInspectionChecker.cpp @@ -418,6 +418,12 @@ ProgramStateRef State = C.getState(); + // Unwrap symbolic expression to skip argument casts on function call. + // This is useful when there is no way for overloading function in C + // but we need to pass different types of arguments and + // implicit cast occures. + Sym = Sym->ignoreCasts(); + C.addTransition(C.getState()->set<DenotedSymbols>(Sym, E)); } Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h +++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymbolManager.h @@ -262,7 +262,7 @@ /// Represents a cast expression. class SymbolCast : public SymExpr { - const SymExpr *Operand; + SymbolRef Operand; /// Type of the operand. QualType FromTy; @@ -271,7 +271,7 @@ QualType ToTy; public: - SymbolCast(const SymExpr *In, QualType From, QualType To) + SymbolCast(SymbolRef In, QualType From, QualType To) : SymExpr(SymbolCastKind), Operand(In), FromTy(From), ToTy(To) { assert(In); assert(isValidTypeForSymbol(From)); @@ -287,12 +287,19 @@ QualType getType() const override { return ToTy; } - const SymExpr *getOperand() const { return Operand; } + SymbolRef getOperand() const { return Operand; } + + SymbolRef ignoreCasts() const override { + SymbolRef Sym = Operand; + while (isa<SymbolCast>(Sym)) + Sym = cast<SymbolCast>(Sym)->Operand; + return Sym; + } void dumpToStream(raw_ostream &os) const override; - static void Profile(llvm::FoldingSetNodeID& ID, - const SymExpr *In, QualType From, QualType To) { + static void Profile(llvm::FoldingSetNodeID &ID, SymbolRef In, QualType From, + QualType To) { ID.AddInteger((unsigned) SymbolCastKind); ID.AddPointer(In); ID.Add(From); @@ -304,9 +311,7 @@ } // Implement isa<T> support. - static bool classof(const SymExpr *SE) { - return SE->getKind() == SymbolCastKind; - } + static bool classof(SymbolRef SE) { return SE->getKind() == SymbolCastKind; } }; /// Represents a symbolic expression involving a binary operator Index: clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h =================================================================== --- clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h +++ clang/include/clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h @@ -62,6 +62,8 @@ virtual QualType getType() const = 0; virtual void Profile(llvm::FoldingSetNodeID &profile) = 0; + virtual const SymExpr *ignoreCasts() const { return this; } + /// Iterator over symbols that the current symbol depends on. /// /// For SymbolData, it's the symbol itself; for expressions, it's the Index: clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h =================================================================== --- clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h +++ clang/include/clang/StaticAnalyzer/Checkers/SValExplainer.h @@ -135,8 +135,9 @@ " (" + Visit(S->getRHS()) + ")"; } - // TODO: SymbolCast doesn't appear in practice. - // Add the relevant code once it does. + std::string VisitSymbolCast(const SymbolCast *S) { + return "(" + S->getType().getAsString() + ")" + Visit(S->getOperand()); + } std::string VisitSymbolicRegion(const SymbolicRegion *R) { // Explain 'this' object here.
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits