================
@@ -393,6 +401,162 @@ ProgramStateRef 
CStringChecker::checkNonNull(CheckerContext &C,
   return stateNonNull;
 }
 
+static std::optional<NonLoc> getIndex(ProgramStateRef State,
+                                      const ElementRegion *ER, CharKind CK) {
+  SValBuilder &SVB = State->getStateManager().getSValBuilder();
+  ASTContext &Ctx = SVB.getContext();
+
+  if (CK == CharKind::Regular) {
+    if (ER->getValueType() != Ctx.CharTy)
+      return {};
+    return ER->getIndex();
+  }
+
+  if (ER->getValueType() != Ctx.WideCharTy)
+    return {};
+
+  QualType SizeTy = Ctx.getSizeType();
+  NonLoc WideSize =
+      SVB.makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
+                     SizeTy)
+          .castAs<NonLoc>();
+  SVal Offset =
+      SVB.evalBinOpNN(State, BO_Mul, ER->getIndex(), WideSize, SizeTy);
+  if (Offset.isUnknown())
+    return {};
+  return Offset.castAs<NonLoc>();
+}
+
+// Try to get hold of the origin region (e.g. the actual array region from an
+// element region).
+static const TypedValueRegion *getOriginRegion(const ElementRegion *ER) {
+  const MemRegion *MR = ER->getSuperRegion();
+  const MemRegion *Ret = MR;
+  assert(MR);
+  if (const auto *sym = MR->getAs<SymbolicRegion>()) {
+    SymbolRef sym2 = sym->getSymbol();
+    if (!sym2)
+      return nullptr;
+    Ret = sym2->getOriginRegion();
+  }
+  return dyn_cast_or_null<TypedValueRegion>(Ret);
+}
----------------
NagyDonat wrote:

I thought several hours about this function, and my TLDR conclusion is that (1) 
this logic is wrong (2) `SymExpr::getOriginRegion()` is a red flag and (3) just 
use a plain `getSuperRegion()` call instead of this function. (Note that the 
branch where SymExpr::getOriginRegion() is used is not exercised in any 
testcase.)

More precisely, the problem is that `SymExpr::getOriginRegion()` is an 
extremely unnatural operation, which extracts a part of the "opaque unique tag" 
of the symbol. The result of `S->getOriginRegion()` is:
- the region _R_ when `S` is a `SymbolRegionValue` which represents "the value 
stored in the memory region _R_ at the start of the analysis of this function"
- the region _R_ when `S` is a `SymbolDerived` which represents "the value 
stored in the memory region _R_ after a certain invalidation"
- nullpointer in other cases (e.g. `SymbolConjured`).

As a concrete example, if you have a function like
```c++
void save_to_logs(int *src, int begin, int count) {
  memcpy(global_log_array, src + begin, count * sizeof(int));
}
```
then the element region representing `src + begin` would be passed to 
`getOriginRegion()`, which would first strip the `ElementRegion` layer to get a 
`SymbolicRegion`, then unwrap that to get a `SymbolRegionValue` and finally 
return the `ParamVarRegion` which represents the pointer-sized memory area of 
the variable `src`.

This won't lead to a crash because this `ParamVarRegion` is immediately 
discarded by the next check, which requires an array type:
```
if (!Orig->getValueType()->isArrayType())
    return State;
```
However, there are no situations where `getOriginRegion()` = "region where a 
pointer to this symbolic region was stored at some earlier step" is actually 
useful, so you should just remove the branch using it.

By the way, I see that you need a `TypedValueRegion` after this point to 
continue the calculations -- but if the SuperRegion of your ElementRegion is a 
SymbolicRegion, then the right thing to do is just discarding it. A 
`SymbolicRegion` is not necessarily a real "region", it may just represent a 
pointer that points into the middle of some opaque unknown memory.

https://github.com/llvm/llvm-project/pull/95408
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to