llvmorg-github-actions[bot] wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang-analysis

Author: Zhijie Wang (aeft)

<details>
<summary>Changes</summary>

Extend origin trees with field-labeled edges so loans through record fields can 
be tracked.

- `OriginNode::Edge` carries a `FieldDecl *` label; the tree fans out across a 
record's tracked fields (lambda fields and public fields).
- `flow()` recurses on field children matched by `FieldDecl`.
- Cycle cut at record nodes to bound self-referential types like `struct Node { 
Node *next; }`.

#<!-- -->184344

---

Patch is 26.88 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/195603.diff


8 Files Affected:

- (modified) 
clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h (+4) 
- (modified) clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h 
(+57-22) 
- (modified) clang/lib/Analysis/LifetimeSafety/Facts.cpp (+1) 
- (modified) clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp (+39-10) 
- (modified) clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp (+21-12) 
- (modified) clang/lib/Analysis/LifetimeSafety/Origins.cpp (+82-8) 
- (modified) clang/test/Sema/warn-lifetime-safety-dangling-field.cpp (+4-6) 
- (modified) clang/test/Sema/warn-lifetime-safety.cpp (+230-8) 


``````````diff
diff --git 
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h 
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
index b5185ba815b00..9f53df4735e53 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -125,6 +125,10 @@ class FactsGenerator : public 
ConstStmtVisitor<FactsGenerator> {
 
   void markUseAsWrite(const DeclRefExpr *DRE);
 
+  /// Walks the full subtree so origins on the pointee chain and on field
+  /// children both escape with the returned value.
+  void emitReturnEscapes(OriginNode *N, const Expr *RetExpr);
+
   bool escapesViaReturn(OriginID OID) const;
 
   llvm::SmallVector<Fact *> issuePlaceholderLoans();
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h 
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
index cacefc8aa62ad..5ff64d1b33784 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
@@ -21,6 +21,7 @@
 #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeStats.h"
 #include "clang/Analysis/Analyses/LifetimeSafety/Utils.h"
 #include "clang/Analysis/AnalysisDeclContext.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/Support/raw_ostream.h"
 
 namespace clang::lifetimes::internal {
@@ -66,47 +67,71 @@ struct Origin {
   }
 };
 
-/// A list of origins representing levels of indirection for pointer-like 
types.
+/// A tree of origins representing the structure of a pointer-like or
+/// record type.
 ///
-/// Each node in the list contains an OriginID representing a level of
-/// indirection. The list structure captures the multi-level nature of
-/// pointer and reference types in the lifetime analysis.
+/// Each node carries an OriginID and is connected to children via labeled
+/// edges: either a pointee edge (one level of pointer/reference indirection)
+/// or a field edge (a named field of a record). Pointer-like types form a
+/// pointee chain; record types fan out via field edges.
 ///
 /// Examples:
-///   - For `int& x`, the list has size 2:
+///   - For `int& x`, the chain has length 2:
 ///     * Outer: origin for the reference storage itself (the lvalue `x`)
 ///     * Inner: origin for what `x` refers to
 ///
-///   - For `int* p`, the list has size 2:
+///   - For `int* p`, the chain has length 2:
 ///     * Outer: origin for the pointer variable `p`
 ///     * Inner: origin for what `p` points to
 ///
-///   - For `View v` (where View is gsl::Pointer), the list has size 2:
+///   - For `View v` (where View is gsl::Pointer), the chain has length 2:
 ///     * Outer: origin for the view object itself
 ///     * Inner: origin for what the view refers to
 ///
-///   - For `int** pp`, the list has size 3:
+///   - For `int** pp`, the chain has length 3:
 ///     * Outer: origin for `pp` itself
 ///     * Inner: origin for `*pp` (what `pp` points to)
 ///     * Inner->Inner: origin for `**pp` (what `*pp` points to)
 ///
-/// The list structure enables the analysis to track how loans flow through
-/// different levels of indirection when assignments and dereferences occur.
-///
-/// TODO: Currently list-shaped (each node has at most one pointee child).
-/// Will become tree-shaped once field children are added to support
-/// origin trees for records whose fields have origins.
+/// The structure enables the analysis to track how loans flow through
+/// levels of indirection and across record fields when assignments and
+/// dereferences occur.
 class OriginNode {
 public:
+  /// A labeled edge from this node to a child. The label distinguishes how
+  /// the child is reached: a null `FD` means a pointee edge (one level of
+  /// pointer/reference indirection); a non-null `FD` means a field edge
+  /// (the named field of a record). Putting the label on the edge lets
+  /// one child node play different roles per parent. For example, the subtree
+  /// for `s`'s `v` field is reached from `s`'s record (FD=v) and from
+  /// the lvalue outer built for the MemberExpr `s.v` (FD=null).
+  struct Edge {
+    const FieldDecl *FD;
+    OriginNode *Child;
+  };
+
   OriginNode(OriginID OID) : OID(OID) {}
 
+  OriginID getOriginID() const { return OID; }
+
+  llvm::ArrayRef<Edge> children() const { return Children; }
+
   OriginNode *getPointeeChild() const {
-    return Children.empty() ? nullptr : Children[0];
+    for (const Edge &E : Children)
+      if (!E.FD)
+        return E.Child;
+    return nullptr;
   }
 
-  OriginID getOriginID() const { return OID; }
+  OriginNode *getFieldChild(const FieldDecl *F) const {
+    assert(F);
+    for (const Edge &E : Children)
+      if (E.FD == F)
+        return E.Child;
+    return nullptr;
+  }
 
-  void setChildren(llvm::ArrayRef<OriginNode *> NewChildren) {
+  void setChildren(llvm::ArrayRef<Edge> NewChildren) {
     assert(Children.empty() && "children must be set at most once");
     Children = NewChildren;
   }
@@ -125,7 +150,7 @@ class OriginNode {
 
 private:
   OriginID OID;
-  llvm::ArrayRef<OriginNode *> Children;
+  llvm::ArrayRef<Edge> Children;
 };
 
 bool doesDeclHaveStorage(const ValueDecl *D);
@@ -138,18 +163,19 @@ class OriginManager {
 
   /// Gets or creates the OriginNode for a given ValueDecl.
   ///
-  /// Creates a list structure mirroring the levels of indirection in the
-  /// declaration's type (e.g., `int** p` creates list of size 2).
+  /// Creates a tree structure mirroring the levels of indirection in the
+  /// declaration's type (e.g., `int* p` creates a chain of length 2).
   ///
   /// \returns The OriginNode, or nullptr if the type is not pointer-like.
   OriginNode *getOrCreateNode(const ValueDecl *D);
 
   /// Gets or creates the OriginNode for a given Expr.
   ///
-  /// Creates a list based on the expression's type and value category:
+  /// Creates a tree structure based on the expression's type and value
+  /// category:
   /// - Lvalues get an implicit reference level (modeling addressability)
   /// - Rvalues of non-pointer type return nullptr (no trackable origin)
-  /// - DeclRefExpr may reuse the underlying declaration's list
+  /// - DeclRefExpr may reuse the underlying declaration's tree
   ///
   /// \returns The OriginNode, or nullptr for non-pointer rvalues.
   OriginNode *getOrCreateNode(const Expr *E);
@@ -183,9 +209,18 @@ class OriginManager {
   OriginNode *createNode(const Expr *E, QualType QT);
 
   void attachPointeeChild(OriginNode *Parent, OriginNode *Pointee);
+  void attachChildren(OriginNode *Parent,
+                      llvm::ArrayRef<OriginNode::Edge> Children);
 
   template <typename T>
   OriginNode *buildNodeForType(QualType QT, const T *Node);
+  template <typename T>
+  OriginNode *buildNodeForTypeImpl(QualType QT, const T *Node,
+                                   llvm::SmallPtrSet<const Type *, 4> 
&Visited);
+
+  /// Whether a record field participates in origin tracking. Plain records
+  /// only track public fields; lambdas track all fields.
+  bool isTrackedField(const CXXRecordDecl *RD, const FieldDecl *FD) const;
 
   void initializeThisOrigins(const Decl *D);
 
diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp 
b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
index d04d3e9202952..25458f6ef9b54 100644
--- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
@@ -82,6 +82,7 @@ void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &,
   OS << "Use (";
   size_t NumUsedOrigins = getUsedOrigins()->getLength();
   size_t I = 0;
+  // TODO: pointee-chain only; extend to field children.
   for (const OriginNode *Cur = getUsedOrigins(); Cur;
        Cur = Cur->getPointeeChild(), ++I) {
     OM.dump(Cur->getOriginID(), OS);
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp 
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 24bf706b62ee3..3a6eb525ae6ee 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -49,9 +49,13 @@ bool FactsGenerator::hasOrigins(const Expr *E) const {
 /// Propagates origin information from Src to Dst through all levels of
 /// indirection, creating OriginFlowFacts at each level.
 ///
-/// This function enforces a critical type-safety invariant: both lists must
-/// have the same shape (same depth/structure). This invariant ensures that
-/// origins flow only between compatible types during expression evaluation.
+/// This function enforces a critical type-safety invariant: both trees
+/// must have the same pointee-chain depth, and field children are
+/// matched by `FieldDecl`. This invariant ensures that origins flow only
+/// between compatible types during expression evaluation. Field pairs
+/// found on both sides recurse; unmatched fields are skipped, which is
+/// exercised by `CK_DerivedToBase` flows where Base's and Derived's
+/// trees carry distinct direct-field FDs.
 ///
 /// Examples:
 ///   - `int* p = &x;` flows origins from `&x` (depth 1) to `p` (depth 1)
@@ -59,17 +63,24 @@ bool FactsGenerator::hasOrigins(const Expr *E) const {
 ///     * Level 1: pp <- p's address
 ///     * Level 2: (*pp) <- what p points to (i.e., &x)
 ///   - `View v = obj;` flows origins from `obj` (depth 1) to `v` (depth 1)
+///   - `S s2 = s;` flows the top-level origin and recursively flows each
+///     matching `FieldDecl` subtree, so loans on `s.v.inner` propagate to
+///     `s2.v.inner`.
 void FactsGenerator::flow(OriginNode *Dst, OriginNode *Src, bool Kill) {
   if (!Dst)
     return;
   assert(Src &&
          "Dst is non-null but Src is null. List must have the same length");
   assert(Dst->getLength() == Src->getLength() &&
-         "Lists must have the same length");
+         "Pointee chains must have the same length");
 
   while (Dst && Src) {
     CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>(
         Dst->getOriginID(), Src->getOriginID(), Kill));
+    for (const OriginNode::Edge &E : Dst->children())
+      if (E.FD)
+        if (OriginNode *SrcF = Src->getFieldChild(E.FD))
+          flow(E.Child, SrcF, Kill);
     Dst = Dst->getPointeeChild();
     Src = Src->getPointeeChild();
   }
@@ -273,6 +284,19 @@ void FactsGenerator::VisitMemberExpr(const MemberExpr *ME) 
{
     CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>(
         Dst->getOriginID(), Src->getOriginID(),
         /*Kill=*/true));
+
+    // Narrow the UseFact's liveness coverage to the accessed field's
+    // subtree.
+    //
+    // E.g., for `(void)s.inner`, without narrowing, the UseFact at `s`
+    // would keep `s.v`'s subtree live and falsely flag a UAF when a loan
+    // held by `s.v` has already expired.
+    if (UseFact *UF = UseFacts.lookup(ME->getBase())) {
+      assert(!UseFacts.contains(ME) && "ME already has a UseFact");
+      UF->setUsedOrigins(Dst);
+      UseFacts[ME] = UF;
+      UseFacts.erase(ME->getBase());
+    }
   }
 }
 
@@ -359,13 +383,18 @@ void FactsGenerator::VisitUnaryOperator(const 
UnaryOperator *UO) {
   }
 }
 
+void FactsGenerator::emitReturnEscapes(OriginNode *N, const Expr *RetExpr) {
+  if (!N)
+    return;
+  EscapesInCurrentBlock.push_back(
+      FactMgr.createFact<ReturnEscapeFact>(N->getOriginID(), RetExpr));
+  for (const OriginNode::Edge &E : N->children())
+    emitReturnEscapes(E.Child, RetExpr);
+}
+
 void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) {
-  if (const Expr *RetExpr = RS->getRetValue()) {
-    if (OriginNode *Node = getOriginNode(*RetExpr))
-      for (OriginNode *L = Node; L != nullptr; L = L->getPointeeChild())
-        EscapesInCurrentBlock.push_back(
-            FactMgr.createFact<ReturnEscapeFact>(L->getOriginID(), RetExpr));
-  }
+  if (const Expr *RetExpr = RS->getRetValue())
+    emitReturnEscapes(getOriginNode(*RetExpr), RetExpr);
 }
 
 void FactsGenerator::handleAssignment(const Expr *LHSExpr,
diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp 
b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
index a23d7f224b4bc..c1f82796a601e 100644
--- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
@@ -127,21 +127,30 @@ class AnalysisImpl
   /// A read operation makes the origin live with definite confidence, as it
   /// dominates this program point. A write operation kills the liveness of
   /// the origin since it overwrites the value.
+  ///
+  /// Walks the full subtree so loans held by any descendant (pointee
+  /// chain or field child) become visible at the use site.
   Lattice transfer(Lattice In, const UseFact &UF) {
+    return transferUseSubtree(In, UF, UF.getUsedOrigins());
+  }
+
+  Lattice transferUseSubtree(Lattice In, const UseFact &UF,
+                             const OriginNode *Cur) {
+    if (!Cur)
+      return In;
+    OriginID OID = Cur->getOriginID();
     Lattice Out = In;
-    for (const OriginNode *Cur = UF.getUsedOrigins(); Cur;
-         Cur = Cur->getPointeeChild()) {
-      OriginID OID = Cur->getOriginID();
-      // Write kills liveness.
-      if (UF.isWritten()) {
-        Out = Lattice(Factory.remove(Out.LiveOrigins, OID));
-      } else {
-        // Read makes origin live with definite confidence (dominates this
-        // point).
-        Out = Lattice(Factory.add(Out.LiveOrigins, OID,
-                                  LivenessInfo(&UF, LivenessKind::Must)));
-      }
+    // Write kills liveness.
+    if (UF.isWritten()) {
+      Out = Lattice(Factory.remove(Out.LiveOrigins, OID));
+    } else {
+      // Read makes origin live with definite confidence (dominates this
+      // point).
+      Out = Lattice(Factory.add(Out.LiveOrigins, OID,
+                                LivenessInfo(&UF, LivenessKind::Must)));
     }
+    for (const OriginNode::Edge &E : Cur->children())
+      Out = transferUseSubtree(Out, UF, E.Child);
     return Out;
   }
 
diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp 
b/clang/lib/Analysis/LifetimeSafety/Origins.cpp
index 41d3adaaae59f..bdfe5b097af81 100644
--- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp
@@ -112,16 +112,21 @@ bool OriginManager::hasOrigins(QualType QT) const {
   // stored lambda's origins.
   if (isStdCallableWrapperType(RD))
     return true;
-  // TODO: Limit to lambdas for now. This will be extended to user-defined
-  // structs with pointer-like fields.
-  if (!RD->isLambda())
+  // TODO: Unions are not tracked.
+  if (RD->isUnion())
     return false;
   for (const auto *FD : RD->fields())
-    if (hasOrigins(FD->getType()))
+    if (isTrackedField(RD, FD))
       return true;
   return false;
 }
 
+bool OriginManager::isTrackedField(const CXXRecordDecl *RD,
+                                   const FieldDecl *FD) const {
+  return (RD->isLambda() || FD->getAccess() == AS_public) &&
+         hasOrigins(FD->getType());
+}
+
 /// Determines if an expression has origins that need to be tracked.
 ///
 /// An expression has origins if:
@@ -193,13 +198,40 @@ OriginNode 
*OriginManager::createSingleOriginNode(OriginID OID) {
 void OriginManager::attachPointeeChild(OriginNode *Parent,
                                        OriginNode *Pointee) {
   assert(Pointee && "pointee subtree must be non-null");
-  Parent->setChildren(
-      {new (Allocator.Allocate<OriginNode *>()) OriginNode *(Pointee), 1});
+  auto *E = new (Allocator.Allocate<OriginNode::Edge>())
+      OriginNode::Edge{nullptr, Pointee};
+  Parent->setChildren({E, 1});
+}
+
+void OriginManager::attachChildren(OriginNode *Parent,
+                                   llvm::ArrayRef<OriginNode::Edge> Children) {
+  Parent->setChildren(Children.copy(Allocator));
 }
 
 template <typename T>
 OriginNode *OriginManager::buildNodeForType(QualType QT, const T *Node) {
-  assert(hasOrigins(QT) && "buildNodeForType called for non-pointer type");
+  llvm::SmallPtrSet<const Type *, 4> Visited;
+  return buildNodeForTypeImpl(QT, Node, Visited);
+}
+
+template <typename T>
+OriginNode *OriginManager::buildNodeForTypeImpl(
+    QualType QT, const T *Node, llvm::SmallPtrSet<const Type *, 4> &Visited) {
+  assert(hasOrigins(QT) && "buildNodeForType called for type without origins");
+
+  const auto *RD = QT->getAsCXXRecordDecl();
+  const Type *Canonical = QT.getCanonicalType().getTypePtr();
+  // Cycle cut: only records enter Visited; re-entering one returns a
+  // leaf to stop descending further. Loans landing on the cut leaf are
+  // dropped (e.g., through `n->next->next`).
+  //
+  // Pointer/reference types stay transparent: including them in Visited
+  // would make the same record's shape depend on the entry path. E.g.,
+  // Node's Sub_next would have length 2 from a Node start but length 1
+  // from a Node* start, breaking flow's length assertion.
+  if (RD && !Visited.insert(Canonical).second)
+    return createNode(Node, QT);
+
   OriginNode *Head = createNode(Node, QT);
 
   if (QT->isPointerOrReferenceType()) {
@@ -207,8 +239,24 @@ OriginNode *OriginManager::buildNodeForType(QualType QT, 
const T *Node) {
     // We recurse if the pointee type is pointer-like, to build the next
     // level in the origin tree. E.g., for T*& / View&.
     if (hasOrigins(PointeeTy))
-      attachPointeeChild(Head, buildNodeForType(PointeeTy, Node));
+      attachPointeeChild(Head, buildNodeForTypeImpl(PointeeTy, Node, Visited));
+  } else if (RD) {
+    bool shouldExpandFields =
+        !(isGslPointerType(QT) || isStdCallableWrapperType(RD) ||
+          LifetimeAnnotatedOriginTypes.contains(Canonical));
+    if (shouldExpandFields) {
+      SmallVector<OriginNode::Edge, 4> FieldChildren;
+      for (const FieldDecl *F : RD->fields())
+        if (isTrackedField(RD, F)) {
+          OriginNode *Sub = buildNodeForTypeImpl(F->getType(), Node, Visited);
+          FieldChildren.push_back({F, Sub});
+        }
+      attachChildren(Head, FieldChildren);
+    }
   }
+
+  if (RD)
+    Visited.erase(Canonical);
   return Head;
 }
 
@@ -273,6 +321,32 @@ OriginNode *OriginManager::getOrCreateNode(const Expr *E) {
     return ExprToNode[E] = Head;
   }
 
+  // For a MemberExpr whose base is not `this` (handled above), look up the
+  // field child in the base's per-instance origin tree. This makes loans
+  // flowing into one occurrence of `s.v` visible at later occurrences.
+  if (auto *ME = dyn_cast<MemberExpr>(E))
+    if (auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
+      // E.g. `p->v` walks two hops: DRE outer, then pointer indirection.
+      for (OriginNode *N =
+               getOrCreateNode(ME->getBase()->IgnoreParenImpCasts());
+           N; N = N->getPointeeChild())
+        if (OriginNode *Sub = N->getFieldChild(FD)) {
+          // For non-reference fields (e.g., `View v;` in a record), the
+          // MemberExpr `s.v` is an lvalue (addressable) that can be
+          // borrowed, so we create an outer origin for the lvalue itself,
+          // with the pointee being the field's shared subtree. `&s.v` borrows
+          // the storage of the v-slot in s, not what v refers to.
+          if (doesDeclHaveStorage(FD)) {
+            OriginNode *Outer = createNode(E, QualType{});
+            attachPointeeChild(Outer, Sub);
+            return ExprToNode[E] = Outer;
+          }
+          // For reference-typed fields (e.g., `int& r;` in a record) which
+          // have no storage, the MemberExpr `s.r` directly reuses the
+          // field's subtree.
+          return ExprToNode[E] = Sub;
+        }
+
   // If E is an lvalue , it refers to storage. We model this storage as the
   // first level of origin list, as if it were a reference, because l-values 
are
   // addressable.
diff --git a/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp 
b/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp
index 5a4de105f217d..ac101f8e9bc13 100644
--- a/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp
@@ -162,27 +162,25 @@ struct MemberSetters {
   }
 };
 
-// FIXME: Detect escape to field of field.
 struct IndirectEscape{
   struct {
     const char *p;
-  } b;
+  } b; // expected-note {{this field dangles}}
   
   void foo() {
     std::string s;
-    b.p = s.data();
+    b.p = s.data(); // expected-warning {{address of stack memory escapes to a 
field}}
   }
 };
 
-// FIXME: Dete...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/195603
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits

Reply via email to