================
@@ -393,6 +401,173 @@ ProgramStateRef 
CStringChecker::checkNonNull(CheckerContext &C,
   return stateNonNull;
 }
 
+static std::optional<NonLoc> getIndex(ProgramStateRef State,
+                                      const ElementRegion *ER, CharKind CK) {
+  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SValBuilder.getContext();
+
+  if (CK == CharKind::Regular) {
+    if (ER->getValueType() != Ctx.CharTy)
+      return {};
+    return ER->getIndex();
+  }
+
+  if (ER->getValueType() != Ctx.WideCharTy)
+    return {};
+
+  QualType SizeTy = Ctx.getSizeType();
+  NonLoc WideSize =
+      SValBuilder
+          .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+                      SizeTy)
+          .castAs<NonLoc>();
+  SVal Offset =
+      SValBuilder.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+  if (Offset.isUnknown())
+    return {};
+  return Offset.castAs<NonLoc>();
+}
+
+// Try to get hold of the origin regin (e.g. the actual array region from an
+// element region).
+static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
+  const MemRegion *MR = ER->getSuperRegion();
+  const MemRegion *Ret = MR;
+  assert(MR);
+  if (const auto *sym = MR->getAs<SymbolicRegion>()) {
+    SymbolRef sym2 = sym->getSymbol();
+    if (!sym2)
+      return nullptr;
+    Ret = sym2->getOriginRegion();
+  }
+  if (const auto *element = MR->getAs<ElementRegion>()) {
+    Ret = element->getBaseRegion();
+  }
+  return dyn_cast_or_null<TypedValueRegion>(Ret);
+}
+
+// Basically 1 -> 1st, 12 -> 12th, etc.
+static void printIdxWithOrdinalSuffix(llvm::raw_ostream &Os, unsigned Idx) {
+  Os << Idx << llvm::getOrdinalSuffix(Idx);
+}
+
+ProgramStateRef CStringChecker::checkInit(CheckerContext &C,
+                                          ProgramStateRef State,
+                                          AnyArgExpr Buffer, SVal Element,
+                                          SVal Size) const {
+
+  // If a previous check has failed, propagate the failure.
+  if (!State)
+    return nullptr;
+
+  const MemRegion *R = Element.getAsRegion();
+  if (!R)
+    return State;
+
+  const auto *ER = dyn_cast<ElementRegion>(R);
+  if (!ER)
+    return State;
+
+  const TypedValueRegion *Orig = getOriginRegion(ER);
+  if (!Orig)
+    return State;
+
+  SValBuilder &SValBuilder = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SValBuilder.getContext();
+
+  // FIXME: We ought to able to check objects as well. Maybe
+  // UninitializedObjectChecker could help?
+  if (!Orig->getValueType()->isArrayType())
+    return State;
+
+  const QualType ElemTy = Ctx.getBaseElementType(Orig->getValueType());
+  const NonLoc Zero = SValBuilder.makeZeroArrayIndex();
+
+  SVal FirstElementVal =
+      State->getLValue(ElemTy, Zero, loc::MemRegionVal(Orig)).castAs<Loc>();
+  if (!isa<Loc>(FirstElementVal))
+    return State;
+
+  // Ensure that we wouldn't read uninitialized value.
+  if (Filter.CheckCStringUninitializedRead &&
+      State->getSVal(FirstElementVal.castAs<Loc>()).isUndef()) {
+    llvm::SmallString<258> Buf;
+    llvm::raw_svector_ostream OS(Buf);
+    OS << "The first element of the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument is undefined";
+    emitUninitializedReadBug(C, State, Buffer.Expression, OS.str());
+    return nullptr;
+  }
+
+  // We won't check whether the entire region is fully initialized -- lets just
+  // check that the first and the last element is. So, onto checking the last
+  // element:
+  const QualType IdxTy = SValBuilder.getArrayIndexType();
+
+  NonLoc ElemSize =
+      SValBuilder
+          .makeIntVal(Ctx.getTypeSizeInChars(ElemTy).getQuantity(), IdxTy)
+          .castAs<NonLoc>();
+
+  // FIXME: Check that the size arg to the cstring function is divisible by
+  // size of the actual element type?
+
+  // The type of the argument to the cstring function is either char or wchar,
+  // but thats not the type of the original array (or memory region).
+  // Suppose the following:
+  //   int t[5];
+  //   memcpy(dst, t, sizeof(t) / sizeof(t[0]));
+  // When checking whether t is fully initialized, we see it as char array of
+  // size sizeof(int)*5. If we check the last element as a character, we read
+  // the last byte of an integer, which will be undefined. But just because
+  // that value is undefined, it doesn't mean that the element is 
uninitialized!
+  // For this reason, we need to retrieve the actual last element with the
+  // correct type.
+
+  // Divide the size argument to the cstring function by the actual element
+  // type. This value will be size of the array, or the index to the
+  // past-the-end element.
+  std::optional<NonLoc> Offset =
+      SValBuilder
+          .evalBinOpNN(State, clang::BO_Div, Size.castAs<NonLoc>(), ElemSize,
+                       IdxTy)
+          .getAs<NonLoc>();
+
+  // Retrieve the index of the last element.
+  const NonLoc One = SValBuilder.makeIntVal(1, IdxTy).castAs<NonLoc>();
+  SVal LastIdx = SValBuilder.evalBinOpNN(State, BO_Sub, *Offset, One, IdxTy);
+
+  if (!Offset)
+    return State;
+
+  SVal LastElementVal =
+      State->getLValue(ElemTy, LastIdx, loc::MemRegionVal(Orig));
+  if (!isa<Loc>(LastElementVal))
+    return State;
+
+  if (Filter.CheckCStringUninitializedRead &&
+      State->getSVal(LastElementVal.castAs<Loc>()).isUndef()) {
+    const llvm::APSInt *IdxInt = LastIdx.getAsInteger();
+    // If we can't get emit a sensible last element index, just bail out --
+    // prefer to emit nothing in favour of emitting garbage quality reports.
+    if (!IdxInt) {
+      C.addSink();
+      return nullptr;
+    }
+    llvm::SmallString<258> Buf;
+    llvm::raw_svector_ostream OS(Buf);
+    OS << "The last element of the ";
+    printIdxWithOrdinalSuffix(OS, Buffer.ArgumentIndex + 1);
+    OS << " argument to access (the ";
+    printIdxWithOrdinalSuffix(OS, IdxInt->getExtValue() + 1);
+    OS << ") is undefined";
----------------
NagyDonat wrote:

What would you think about messages like _"In the 4th argument the last 
accessed element (which is at index 31) is undefined"_?

This differs from your suggestion in three ways:
- You're right that we need to emphasize that we're only accessing a part of 
the array, but I feel that "last accessed element" would be a better phrasing 
than the dangling "to access" (which was confusing for me before your 
explanation).
- I think it's better to express the index as a zero-based index instead of an 
one-based ordinal number, because one-based indexing could be unexpected and 
confusing in C/C++.
- I reordered the parts of the message to ensure that "last accessed element" 
and the "(which is at index ...)" note can be next to each other.

https://github.com/llvm/llvm-project/pull/95408
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to