================ @@ -393,6 +401,173 @@ ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, return stateNonNull; } +static std::optional<NonLoc> getIndex(ProgramStateRef State, + const ElementRegion *ER, CharKind CK) { + SValBuilder &SValBuilder = State->getStateManager().getSValBuilder(); + ASTContext &Ctx = SValBuilder.getContext(); + + if (CK == CharKind::Regular) { + if (ER->getValueType() != Ctx.CharTy) + return {}; + return ER->getIndex(); + } + + if (ER->getValueType() != Ctx.WideCharTy) + return {}; + + QualType SizeTy = Ctx.getSizeType(); + NonLoc WideSize = + SValBuilder + .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(), + SizeTy) + .castAs<NonLoc>(); + SVal Offset = + SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy); + if (Offset.isUnknown()) + return {}; + return Offset.castAs<NonLoc>(); +} + +// Try to get hold of the origin regin (e.g. the actual array region from an +// element region). +static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) { + const MemRegion *MR = ER->getSuperRegion(); + const MemRegion *Ret = MR; + assert(MR); + if (const auto *sym = MR->getAs<SymbolicRegion>()) { + SymbolRef sym2 = sym->getSymbol(); + if (!sym2) + return nullptr; + Ret = sym2->getOriginRegion(); + } + if (const auto *element = MR->getAs<ElementRegion>()) { + Ret = element->getBaseRegion(); + } + return dyn_cast_or_null<TypedValueRegion>(Ret); +} + +// Basically 1 -> 1st, 12 -> 12th, etc. +static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) { + Os << Idx << llvm::getOrdinalSuffix(Idx); +} + +ProgramStateRef CStringChecker::checkInit(CheckerContext &C, + ProgramStateRef State, + AnyArgExpr Buffer, SVal Element, + SVal Size) const { + + // If a previous check has failed, propagate the failure. + if (!State) + return nullptr; + + const MemRegion *R = Element.getAsRegion(); + if (!R) + return State; + + const auto *ER = dyn_cast<ElementRegion>(R); + if (!ER) + return State; + + const TypedValueRegion *Orig = getOriginRegion(ER); + if (!Orig) + return State; + + SValBuilder &SValBuilder = State->getStateManager().getSValBuilder(); + ASTContext &Ctx = SValBuilder.getContext(); + + // FIXME: We ought to able to check objects as well. Maybe + // UninitializedObjectChecker could help? + if (!Orig->getValueType()->isArrayType()) + return State; + + const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType()); + const NonLoc Zero = SValBuilder.makeZeroArrayIndex(); + + SVal FirstElementVal = + State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>(); + if (!isa<Loc>(FirstElementVal)) + return State; + + // Ensure that we wouldn't read uninitialized value. + if (Filter.CheckCStringUninitializedRead && + State->getSVal(FirstElementVal.castAs<Loc>()).isUndef()) { + llvm::SmallString<258> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "The first element of the "; + printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1); + OS << " argument is undefined"; + emitUninitializedReadBug(C, State, Buffer.Expression, OS.str()); + return nullptr; + } + + // We won't check whether the entire region is fully initialized -- lets just + // check that the first and the last element is. So, onto checking the last + // element: + const QualType IdxTy = SValBuilder.getArrayIndexType(); + + NonLoc ElemSize = + SValBuilder + .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy) + .castAs<NonLoc>(); + + // FIXME: Check that the size arg to the cstring function is divisible by + // size of the actual element type? + + // The type of the argument to the cstring function is either char or wchar, + // but thats not the type of the original array (or memory region). + // Suppose the following: + // int t[5]; + // memcpy(dst, t, sizeof(t) / sizeof(t[0])); + // When checking whether t is fully initialized, we see it as char array of + // size sizeof(int)*5. If we check the last element as a character, we read + // the last byte of an integer, which will be undefined. But just because + // that value is undefined, it doesn't mean that the element is uninitialized! + // For this reason, we need to retrieve the actual last element with the + // correct type. + + // Divide the size argument to the cstring function by the actual element + // type. This value will be size of the array, or the index to the + // past-the-end element. + std::optional<NonLoc> Offset = + SValBuilder + .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize, + IdxTy) + .getAs<NonLoc>(); + + // Retrieve the index of the last element. + const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>(); + SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy); + + if (!Offset) + return State; + + SVal LastElementVal = + State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(Orig)); + if (!isa<Loc>(LastElementVal)) + return State; + + if (Filter.CheckCStringUninitializedRead && + State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) { + const llvm::APSInt *IdxInt = LastIdx.getAsInteger(); + // If we can't get emit a sensible last element index, just bail out -- + // prefer to emit nothing in favour of emitting garbage quality reports. + if (!IdxInt) { + C.addSink(); + return nullptr; + } + llvm::SmallString<258> Buf; + llvm::raw_svector_ostream OS(Buf); + OS << "The last element of the "; + printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1); + OS << " argument to access (the "; + printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1); + OS << ") is undefined"; ---------------- NagyDonat wrote:
What would you think about messages like _"In the 4th argument the last accessed element (which is at index 31) is undefined"_? This differs from your suggestion in three ways: - You're right that we need to emphasize that we're only accessing a part of the array, but I feel that "last accessed element" would be a better phrasing than the dangling "to access" (which was confusing for me before your explanation). - I think it's better to express the index as a zero-based index instead of an one-based ordinal number, because one-based indexing could be unexpected and confusing in C/C++. - I reordered the parts of the message to ensure that "last accessed element" and the "(which is at index ...)" note can be next to each other. https://github.com/llvm/llvm-project/pull/95408 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits