llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-static-analyzer-1

Author: Balazs Benics (steakhal)

<details>
<summary>Changes</summary>

Like in the test case:
```c++
struct String {
  String(const String &amp;) {}
};

struct MatchComponent {
  unsigned numbers[2];
  String prerelease;
  MatchComponent(MatchComponent const &amp;) = default;
};

MatchComponent get();
void consume(MatchComponent const &amp;);

MatchComponent parseMatchComponent() {
  MatchComponent component = get();
  component.numbers[0] = 10;
  component.numbers[1] = 20;
  return component; // We should have no stack addr escape warning here.
}

void top() {
  consume(parseMatchComponent());
}
```

When calling `consume(parseMatchComponent())` the `parseMatchComponent()` would 
return a copy of a temporary of `component`. That copy would invoke the 
`MatchComponent::MatchComponent(const MatchComponent &amp;)` ctor.

That ctor would have a (reference typed) ParamVarRegion, holding the location 
(lvalue) of the object we are about to copy (&amp;component). So far so good, 
but just before evaluating the binding operation for initializing the `numbers` 
field of the temporary, we evaluate the ArrayInitLoopExpr representing the 
by-value elementwise copy of the array `component.numbers`. This is represented 
by a LazyCompoundVal, because we (usually) don't just copy large arrays and 
bind many individual direct bindings. Rather, we take a snapshot by using a LCV.

However, notice that the LCV representing this copy would look like this:
  lazyCompoundVal{ParamVarRegion{"reference param of cctor"}.numbers}

Notice that it refers to the numbers field of a reference. It would be much 
better to desugar the reference to the actual object, thus it should be: 
`lazyCompoundVal{component.numbers}`

Actually, when binding the result of the ArrayInitLoopExpr to the 
`temp_object.numbers` in the compiler-generated member initializer of the 
cctor, we should have two individual direct bindings because this is a "small 
array":
```
  binding &amp;Element{temp_object.numbers, 0} &lt;- 
loadFrom(&amp;Element{component.numbers, 0})
  binding &amp;Element{temp_object.numbers, 1} &lt;- 
loadFrom(&amp;Element{component.numbers, 1})
```
Where `loadFrom(...)` would be:
```
  loadFrom(&amp;Element{component.numbers, 0}): 10 U32b
  loadFrom(&amp;Element{component.numbers, 1}): 20 U32b
```
So the store should look like this, after PostInitializer of 
`temp_object.numbers`:
```
  temp_object at offset  0: 10 U32b
  temp_object at offset 32: 20 U32b
```

The lesson is that it's okay to have TypedValueRegions of references as long as 
we don't form subregions of those. If we ever want to refer to a subregion of a 
"reference" we actually meant to "desugar" the reference and slice a subregion 
of the pointee of the reference instead.

Once this canonicalization is in place, we can also drop the special handling 
of references in `ProcessInitializer`, because now reference TypedValueRegions 
are eagerly desugared into their referee region when forming a subregion of it.

There should be no practical differences, but there are of course bugs that 
this patch may surface.

---
Full diff: https://github.com/llvm/llvm-project/pull/118096.diff


5 Files Affected:

- (modified) 
clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h (+1) 
- (modified) clang/lib/StaticAnalyzer/Core/ExprEngine.cpp (+1-9) 
- (modified) clang/lib/StaticAnalyzer/Core/MemRegion.cpp (+8) 
- (modified) clang/lib/StaticAnalyzer/Core/ProgramState.cpp (+12) 
- (modified) clang/test/Analysis/initializer.cpp (+26) 


``````````diff
diff --git 
a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h 
b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
index eef7a54f03bf11..29f534eba2a265 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h
@@ -487,6 +487,7 @@ class ProgramState : public llvm::FoldingSetNode {
   friend void ProgramStateRetain(const ProgramState *state);
   friend void ProgramStateRelease(const ProgramState *state);
 
+  SVal desugarReference(SVal Val) const;
   SVal wrapSymbolicRegion(SVal Base) const;
 };
 
diff --git a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp 
b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
index 22eab9f66418d4..648c7daf823ed3 100644
--- a/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ExprEngine.cpp
@@ -1206,15 +1206,7 @@ void ExprEngine::ProcessInitializer(const CFGInitializer 
CFGInit,
         while ((ASE = dyn_cast<ArraySubscriptExpr>(Init)))
           Init = ASE->getBase()->IgnoreImplicit();
 
-        SVal LValue = State->getSVal(Init, stackFrame);
-        if (!Field->getType()->isReferenceType()) {
-          if (std::optional<Loc> LValueLoc = LValue.getAs<Loc>()) {
-            InitVal = State->getSVal(*LValueLoc);
-          } else if (auto CV = LValue.getAs<nonloc::CompoundVal>()) {
-            // Initializer list for an array.
-            InitVal = *CV;
-          }
-        }
+        InitVal = State->getSVal(Init, stackFrame);
 
         // If we fail to get the value for some reason, use a symbolic value.
         if (InitVal.isUnknownOrUndef()) {
diff --git a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp 
b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
index 02d1358a2001ef..ad4e43630dd44e 100644
--- a/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
+++ b/clang/lib/StaticAnalyzer/Core/MemRegion.cpp
@@ -65,6 +65,11 @@ using namespace ento;
 // MemRegion Construction.
 
//===----------------------------------------------------------------------===//
 
+[[maybe_unused]] static bool isAReferenceTypedValueRegion(const MemRegion *R) {
+  const auto *TyReg = llvm::dyn_cast<TypedValueRegion>(R);
+  return TyReg && TyReg->getValueType()->isReferenceType();
+}
+
 template <typename RegionTy, typename SuperTy, typename Arg1Ty>
 RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1,
                                          const SuperTy *superRegion) {
@@ -76,6 +81,7 @@ RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1,
   if (!R) {
     R = new (A) RegionTy(arg1, superRegion);
     Regions.InsertNode(R, InsertPos);
+    assert(!isAReferenceTypedValueRegion(superRegion));
   }
 
   return R;
@@ -92,6 +98,7 @@ RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1, 
const Arg2Ty arg2,
   if (!R) {
     R = new (A) RegionTy(arg1, arg2, superRegion);
     Regions.InsertNode(R, InsertPos);
+    assert(!isAReferenceTypedValueRegion(superRegion));
   }
 
   return R;
@@ -110,6 +117,7 @@ RegionTy* MemRegionManager::getSubRegion(const Arg1Ty arg1, 
const Arg2Ty arg2,
   if (!R) {
     R = new (A) RegionTy(arg1, arg2, arg3, superRegion);
     Regions.InsertNode(R, InsertPos);
+    assert(!isAReferenceTypedValueRegion(superRegion));
   }
 
   return R;
diff --git a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp 
b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
index 0be2709f0907d8..d4f56342d934c9 100644
--- a/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
+++ b/clang/lib/StaticAnalyzer/Core/ProgramState.cpp
@@ -205,6 +205,16 @@ ProgramStateRef ProgramState::killBinding(Loc LV) const {
   return makeWithStore(newStore);
 }
 
+/// We should never form a MemRegion that would wrap a TypedValueRegion of a
+/// reference type. What we actually wanted was to create a MemRegion refering
+/// to the pointee of that reference.
+SVal ProgramState::desugarReference(SVal Val) const {
+  const auto *TyReg = dyn_cast_or_null<TypedValueRegion>(Val.getAsRegion());
+  if (!TyReg || !TyReg->getValueType()->isReferenceType())
+    return Val;
+  return getSVal(TyReg);
+}
+
 /// SymbolicRegions are expected to be wrapped by an ElementRegion as a
 /// canonical representation. As a canonical representation, SymbolicRegions
 /// should be wrapped by ElementRegions before getting a FieldRegion.
@@ -445,12 +455,14 @@ void ProgramState::setStore(const StoreRef &newStore) {
 }
 
 SVal ProgramState::getLValue(const FieldDecl *D, SVal Base) const {
+  Base = desugarReference(Base);
   Base = wrapSymbolicRegion(Base);
   return getStateManager().StoreMgr->getLValueField(D, Base);
 }
 
 SVal ProgramState::getLValue(const IndirectFieldDecl *D, SVal Base) const {
   StoreManager &SM = *getStateManager().StoreMgr;
+  Base = desugarReference(Base);
   Base = wrapSymbolicRegion(Base);
 
   // FIXME: This should work with `SM.getLValueField(D->getAnonField(), Base)`,
diff --git a/clang/test/Analysis/initializer.cpp 
b/clang/test/Analysis/initializer.cpp
index 16d7a348fdfb6d..f50afff25d2450 100644
--- a/clang/test/Analysis/initializer.cpp
+++ b/clang/test/Analysis/initializer.cpp
@@ -366,3 +366,29 @@ void testI() {
   clang_analyzer_eval(B::b == 2); // expected-warning{{TRUE}}
 }
 } // namespace dont_skip_vbase_initializers_in_most_derived_class
+
+namespace elementwise_copy_small_array_from_post_initializer_of_cctor {
+struct String {
+  String(const String &) {}
+};
+
+struct MatchComponent {
+  unsigned numbers[2];
+  String prerelease;
+  MatchComponent(MatchComponent const &) = default;
+};
+
+MatchComponent get();
+void consume(MatchComponent const &);
+
+MatchComponent parseMatchComponent() {
+  MatchComponent component = get();
+  component.numbers[0] = 10;
+  component.numbers[1] = 20;
+  return component; // We should have no stack addr escape warning here.
+}
+
+void top() {
+  consume(parseMatchComponent());
+}
+} // namespace elementwise_copy_small_array_from_post_initializer_of_cctor

``````````

</details>


https://github.com/llvm/llvm-project/pull/118096
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to