llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-clang-analysis
Author: Zhijie Wang (aeft)
<details>
<summary>Changes</summary>
Extend origin trees with field-labeled edges so loans through record fields can
be tracked.
- `OriginNode::Edge` carries a `FieldDecl *` label; the tree fans out across a
record's tracked fields (lambda fields and public fields).
- `flow()` recurses on field children matched by `FieldDecl`.
- Cycle cut at record nodes to bound self-referential types like `struct Node {
Node *next; }`.
#<!-- -->184344
---
Patch is 26.88 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/195603.diff
8 Files Affected:
- (modified)
clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h (+4)
- (modified) clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
(+57-22)
- (modified) clang/lib/Analysis/LifetimeSafety/Facts.cpp (+1)
- (modified) clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp (+39-10)
- (modified) clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp (+21-12)
- (modified) clang/lib/Analysis/LifetimeSafety/Origins.cpp (+82-8)
- (modified) clang/test/Sema/warn-lifetime-safety-dangling-field.cpp (+4-6)
- (modified) clang/test/Sema/warn-lifetime-safety.cpp (+230-8)
``````````diff
diff --git
a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
index b5185ba815b00..9f53df4735e53 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h
@@ -125,6 +125,10 @@ class FactsGenerator : public
ConstStmtVisitor<FactsGenerator> {
void markUseAsWrite(const DeclRefExpr *DRE);
+ /// Walks the full subtree so origins on the pointee chain and on field
+ /// children both escape with the returned value.
+ void emitReturnEscapes(OriginNode *N, const Expr *RetExpr);
+
bool escapesViaReturn(OriginID OID) const;
llvm::SmallVector<Fact *> issuePlaceholderLoans();
diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
index cacefc8aa62ad..5ff64d1b33784 100644
--- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
+++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h
@@ -21,6 +21,7 @@
#include "clang/Analysis/Analyses/LifetimeSafety/LifetimeStats.h"
#include "clang/Analysis/Analyses/LifetimeSafety/Utils.h"
#include "clang/Analysis/AnalysisDeclContext.h"
+#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/raw_ostream.h"
namespace clang::lifetimes::internal {
@@ -66,47 +67,71 @@ struct Origin {
}
};
-/// A list of origins representing levels of indirection for pointer-like
types.
+/// A tree of origins representing the structure of a pointer-like or
+/// record type.
///
-/// Each node in the list contains an OriginID representing a level of
-/// indirection. The list structure captures the multi-level nature of
-/// pointer and reference types in the lifetime analysis.
+/// Each node carries an OriginID and is connected to children via labeled
+/// edges: either a pointee edge (one level of pointer/reference indirection)
+/// or a field edge (a named field of a record). Pointer-like types form a
+/// pointee chain; record types fan out via field edges.
///
/// Examples:
-/// - For `int& x`, the list has size 2:
+/// - For `int& x`, the chain has length 2:
/// * Outer: origin for the reference storage itself (the lvalue `x`)
/// * Inner: origin for what `x` refers to
///
-/// - For `int* p`, the list has size 2:
+/// - For `int* p`, the chain has length 2:
/// * Outer: origin for the pointer variable `p`
/// * Inner: origin for what `p` points to
///
-/// - For `View v` (where View is gsl::Pointer), the list has size 2:
+/// - For `View v` (where View is gsl::Pointer), the chain has length 2:
/// * Outer: origin for the view object itself
/// * Inner: origin for what the view refers to
///
-/// - For `int** pp`, the list has size 3:
+/// - For `int** pp`, the chain has length 3:
/// * Outer: origin for `pp` itself
/// * Inner: origin for `*pp` (what `pp` points to)
/// * Inner->Inner: origin for `**pp` (what `*pp` points to)
///
-/// The list structure enables the analysis to track how loans flow through
-/// different levels of indirection when assignments and dereferences occur.
-///
-/// TODO: Currently list-shaped (each node has at most one pointee child).
-/// Will become tree-shaped once field children are added to support
-/// origin trees for records whose fields have origins.
+/// The structure enables the analysis to track how loans flow through
+/// levels of indirection and across record fields when assignments and
+/// dereferences occur.
class OriginNode {
public:
+ /// A labeled edge from this node to a child. The label distinguishes how
+ /// the child is reached: a null `FD` means a pointee edge (one level of
+ /// pointer/reference indirection); a non-null `FD` means a field edge
+ /// (the named field of a record). Putting the label on the edge lets
+ /// one child node play different roles per parent. For example, the subtree
+ /// for `s`'s `v` field is reached from `s`'s record (FD=v) and from
+ /// the lvalue outer built for the MemberExpr `s.v` (FD=null).
+ struct Edge {
+ const FieldDecl *FD;
+ OriginNode *Child;
+ };
+
OriginNode(OriginID OID) : OID(OID) {}
+ OriginID getOriginID() const { return OID; }
+
+ llvm::ArrayRef<Edge> children() const { return Children; }
+
OriginNode *getPointeeChild() const {
- return Children.empty() ? nullptr : Children[0];
+ for (const Edge &E : Children)
+ if (!E.FD)
+ return E.Child;
+ return nullptr;
}
- OriginID getOriginID() const { return OID; }
+ OriginNode *getFieldChild(const FieldDecl *F) const {
+ assert(F);
+ for (const Edge &E : Children)
+ if (E.FD == F)
+ return E.Child;
+ return nullptr;
+ }
- void setChildren(llvm::ArrayRef<OriginNode *> NewChildren) {
+ void setChildren(llvm::ArrayRef<Edge> NewChildren) {
assert(Children.empty() && "children must be set at most once");
Children = NewChildren;
}
@@ -125,7 +150,7 @@ class OriginNode {
private:
OriginID OID;
- llvm::ArrayRef<OriginNode *> Children;
+ llvm::ArrayRef<Edge> Children;
};
bool doesDeclHaveStorage(const ValueDecl *D);
@@ -138,18 +163,19 @@ class OriginManager {
/// Gets or creates the OriginNode for a given ValueDecl.
///
- /// Creates a list structure mirroring the levels of indirection in the
- /// declaration's type (e.g., `int** p` creates list of size 2).
+ /// Creates a tree structure mirroring the levels of indirection in the
+ /// declaration's type (e.g., `int* p` creates a chain of length 2).
///
/// \returns The OriginNode, or nullptr if the type is not pointer-like.
OriginNode *getOrCreateNode(const ValueDecl *D);
/// Gets or creates the OriginNode for a given Expr.
///
- /// Creates a list based on the expression's type and value category:
+ /// Creates a tree structure based on the expression's type and value
+ /// category:
/// - Lvalues get an implicit reference level (modeling addressability)
/// - Rvalues of non-pointer type return nullptr (no trackable origin)
- /// - DeclRefExpr may reuse the underlying declaration's list
+ /// - DeclRefExpr may reuse the underlying declaration's tree
///
/// \returns The OriginNode, or nullptr for non-pointer rvalues.
OriginNode *getOrCreateNode(const Expr *E);
@@ -183,9 +209,18 @@ class OriginManager {
OriginNode *createNode(const Expr *E, QualType QT);
void attachPointeeChild(OriginNode *Parent, OriginNode *Pointee);
+ void attachChildren(OriginNode *Parent,
+ llvm::ArrayRef<OriginNode::Edge> Children);
template <typename T>
OriginNode *buildNodeForType(QualType QT, const T *Node);
+ template <typename T>
+ OriginNode *buildNodeForTypeImpl(QualType QT, const T *Node,
+ llvm::SmallPtrSet<const Type *, 4>
&Visited);
+
+ /// Whether a record field participates in origin tracking. Plain records
+ /// only track public fields; lambdas track all fields.
+ bool isTrackedField(const CXXRecordDecl *RD, const FieldDecl *FD) const;
void initializeThisOrigins(const Decl *D);
diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp
b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
index d04d3e9202952..25458f6ef9b54 100644
--- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp
@@ -82,6 +82,7 @@ void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &,
OS << "Use (";
size_t NumUsedOrigins = getUsedOrigins()->getLength();
size_t I = 0;
+ // TODO: pointee-chain only; extend to field children.
for (const OriginNode *Cur = getUsedOrigins(); Cur;
Cur = Cur->getPointeeChild(), ++I) {
OM.dump(Cur->getOriginID(), OS);
diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
index 24bf706b62ee3..3a6eb525ae6ee 100644
--- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp
@@ -49,9 +49,13 @@ bool FactsGenerator::hasOrigins(const Expr *E) const {
/// Propagates origin information from Src to Dst through all levels of
/// indirection, creating OriginFlowFacts at each level.
///
-/// This function enforces a critical type-safety invariant: both lists must
-/// have the same shape (same depth/structure). This invariant ensures that
-/// origins flow only between compatible types during expression evaluation.
+/// This function enforces a critical type-safety invariant: both trees
+/// must have the same pointee-chain depth, and field children are
+/// matched by `FieldDecl`. This invariant ensures that origins flow only
+/// between compatible types during expression evaluation. Field pairs
+/// found on both sides recurse; unmatched fields are skipped, which is
+/// exercised by `CK_DerivedToBase` flows where Base's and Derived's
+/// trees carry distinct direct-field FDs.
///
/// Examples:
/// - `int* p = &x;` flows origins from `&x` (depth 1) to `p` (depth 1)
@@ -59,17 +63,24 @@ bool FactsGenerator::hasOrigins(const Expr *E) const {
/// * Level 1: pp <- p's address
/// * Level 2: (*pp) <- what p points to (i.e., &x)
/// - `View v = obj;` flows origins from `obj` (depth 1) to `v` (depth 1)
+/// - `S s2 = s;` flows the top-level origin and recursively flows each
+/// matching `FieldDecl` subtree, so loans on `s.v.inner` propagate to
+/// `s2.v.inner`.
void FactsGenerator::flow(OriginNode *Dst, OriginNode *Src, bool Kill) {
if (!Dst)
return;
assert(Src &&
"Dst is non-null but Src is null. List must have the same length");
assert(Dst->getLength() == Src->getLength() &&
- "Lists must have the same length");
+ "Pointee chains must have the same length");
while (Dst && Src) {
CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>(
Dst->getOriginID(), Src->getOriginID(), Kill));
+ for (const OriginNode::Edge &E : Dst->children())
+ if (E.FD)
+ if (OriginNode *SrcF = Src->getFieldChild(E.FD))
+ flow(E.Child, SrcF, Kill);
Dst = Dst->getPointeeChild();
Src = Src->getPointeeChild();
}
@@ -273,6 +284,19 @@ void FactsGenerator::VisitMemberExpr(const MemberExpr *ME)
{
CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>(
Dst->getOriginID(), Src->getOriginID(),
/*Kill=*/true));
+
+ // Narrow the UseFact's liveness coverage to the accessed field's
+ // subtree.
+ //
+ // E.g., for `(void)s.inner`, without narrowing, the UseFact at `s`
+ // would keep `s.v`'s subtree live and falsely flag a UAF when a loan
+ // held by `s.v` has already expired.
+ if (UseFact *UF = UseFacts.lookup(ME->getBase())) {
+ assert(!UseFacts.contains(ME) && "ME already has a UseFact");
+ UF->setUsedOrigins(Dst);
+ UseFacts[ME] = UF;
+ UseFacts.erase(ME->getBase());
+ }
}
}
@@ -359,13 +383,18 @@ void FactsGenerator::VisitUnaryOperator(const
UnaryOperator *UO) {
}
}
+void FactsGenerator::emitReturnEscapes(OriginNode *N, const Expr *RetExpr) {
+ if (!N)
+ return;
+ EscapesInCurrentBlock.push_back(
+ FactMgr.createFact<ReturnEscapeFact>(N->getOriginID(), RetExpr));
+ for (const OriginNode::Edge &E : N->children())
+ emitReturnEscapes(E.Child, RetExpr);
+}
+
void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) {
- if (const Expr *RetExpr = RS->getRetValue()) {
- if (OriginNode *Node = getOriginNode(*RetExpr))
- for (OriginNode *L = Node; L != nullptr; L = L->getPointeeChild())
- EscapesInCurrentBlock.push_back(
- FactMgr.createFact<ReturnEscapeFact>(L->getOriginID(), RetExpr));
- }
+ if (const Expr *RetExpr = RS->getRetValue())
+ emitReturnEscapes(getOriginNode(*RetExpr), RetExpr);
}
void FactsGenerator::handleAssignment(const Expr *LHSExpr,
diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
index a23d7f224b4bc..c1f82796a601e 100644
--- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp
@@ -127,21 +127,30 @@ class AnalysisImpl
/// A read operation makes the origin live with definite confidence, as it
/// dominates this program point. A write operation kills the liveness of
/// the origin since it overwrites the value.
+ ///
+ /// Walks the full subtree so loans held by any descendant (pointee
+ /// chain or field child) become visible at the use site.
Lattice transfer(Lattice In, const UseFact &UF) {
+ return transferUseSubtree(In, UF, UF.getUsedOrigins());
+ }
+
+ Lattice transferUseSubtree(Lattice In, const UseFact &UF,
+ const OriginNode *Cur) {
+ if (!Cur)
+ return In;
+ OriginID OID = Cur->getOriginID();
Lattice Out = In;
- for (const OriginNode *Cur = UF.getUsedOrigins(); Cur;
- Cur = Cur->getPointeeChild()) {
- OriginID OID = Cur->getOriginID();
- // Write kills liveness.
- if (UF.isWritten()) {
- Out = Lattice(Factory.remove(Out.LiveOrigins, OID));
- } else {
- // Read makes origin live with definite confidence (dominates this
- // point).
- Out = Lattice(Factory.add(Out.LiveOrigins, OID,
- LivenessInfo(&UF, LivenessKind::Must)));
- }
+ // Write kills liveness.
+ if (UF.isWritten()) {
+ Out = Lattice(Factory.remove(Out.LiveOrigins, OID));
+ } else {
+ // Read makes origin live with definite confidence (dominates this
+ // point).
+ Out = Lattice(Factory.add(Out.LiveOrigins, OID,
+ LivenessInfo(&UF, LivenessKind::Must)));
}
+ for (const OriginNode::Edge &E : Cur->children())
+ Out = transferUseSubtree(Out, UF, E.Child);
return Out;
}
diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp
b/clang/lib/Analysis/LifetimeSafety/Origins.cpp
index 41d3adaaae59f..bdfe5b097af81 100644
--- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp
+++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp
@@ -112,16 +112,21 @@ bool OriginManager::hasOrigins(QualType QT) const {
// stored lambda's origins.
if (isStdCallableWrapperType(RD))
return true;
- // TODO: Limit to lambdas for now. This will be extended to user-defined
- // structs with pointer-like fields.
- if (!RD->isLambda())
+ // TODO: Unions are not tracked.
+ if (RD->isUnion())
return false;
for (const auto *FD : RD->fields())
- if (hasOrigins(FD->getType()))
+ if (isTrackedField(RD, FD))
return true;
return false;
}
+bool OriginManager::isTrackedField(const CXXRecordDecl *RD,
+ const FieldDecl *FD) const {
+ return (RD->isLambda() || FD->getAccess() == AS_public) &&
+ hasOrigins(FD->getType());
+}
+
/// Determines if an expression has origins that need to be tracked.
///
/// An expression has origins if:
@@ -193,13 +198,40 @@ OriginNode
*OriginManager::createSingleOriginNode(OriginID OID) {
void OriginManager::attachPointeeChild(OriginNode *Parent,
OriginNode *Pointee) {
assert(Pointee && "pointee subtree must be non-null");
- Parent->setChildren(
- {new (Allocator.Allocate<OriginNode *>()) OriginNode *(Pointee), 1});
+ auto *E = new (Allocator.Allocate<OriginNode::Edge>())
+ OriginNode::Edge{nullptr, Pointee};
+ Parent->setChildren({E, 1});
+}
+
+void OriginManager::attachChildren(OriginNode *Parent,
+ llvm::ArrayRef<OriginNode::Edge> Children) {
+ Parent->setChildren(Children.copy(Allocator));
}
template <typename T>
OriginNode *OriginManager::buildNodeForType(QualType QT, const T *Node) {
- assert(hasOrigins(QT) && "buildNodeForType called for non-pointer type");
+ llvm::SmallPtrSet<const Type *, 4> Visited;
+ return buildNodeForTypeImpl(QT, Node, Visited);
+}
+
+template <typename T>
+OriginNode *OriginManager::buildNodeForTypeImpl(
+ QualType QT, const T *Node, llvm::SmallPtrSet<const Type *, 4> &Visited) {
+ assert(hasOrigins(QT) && "buildNodeForType called for type without origins");
+
+ const auto *RD = QT->getAsCXXRecordDecl();
+ const Type *Canonical = QT.getCanonicalType().getTypePtr();
+ // Cycle cut: only records enter Visited; re-entering one returns a
+ // leaf to stop descending further. Loans landing on the cut leaf are
+ // dropped (e.g., through `n->next->next`).
+ //
+ // Pointer/reference types stay transparent: including them in Visited
+ // would make the same record's shape depend on the entry path. E.g.,
+ // Node's Sub_next would have length 2 from a Node start but length 1
+ // from a Node* start, breaking flow's length assertion.
+ if (RD && !Visited.insert(Canonical).second)
+ return createNode(Node, QT);
+
OriginNode *Head = createNode(Node, QT);
if (QT->isPointerOrReferenceType()) {
@@ -207,8 +239,24 @@ OriginNode *OriginManager::buildNodeForType(QualType QT,
const T *Node) {
// We recurse if the pointee type is pointer-like, to build the next
// level in the origin tree. E.g., for T*& / View&.
if (hasOrigins(PointeeTy))
- attachPointeeChild(Head, buildNodeForType(PointeeTy, Node));
+ attachPointeeChild(Head, buildNodeForTypeImpl(PointeeTy, Node, Visited));
+ } else if (RD) {
+ bool shouldExpandFields =
+ !(isGslPointerType(QT) || isStdCallableWrapperType(RD) ||
+ LifetimeAnnotatedOriginTypes.contains(Canonical));
+ if (shouldExpandFields) {
+ SmallVector<OriginNode::Edge, 4> FieldChildren;
+ for (const FieldDecl *F : RD->fields())
+ if (isTrackedField(RD, F)) {
+ OriginNode *Sub = buildNodeForTypeImpl(F->getType(), Node, Visited);
+ FieldChildren.push_back({F, Sub});
+ }
+ attachChildren(Head, FieldChildren);
+ }
}
+
+ if (RD)
+ Visited.erase(Canonical);
return Head;
}
@@ -273,6 +321,32 @@ OriginNode *OriginManager::getOrCreateNode(const Expr *E) {
return ExprToNode[E] = Head;
}
+ // For a MemberExpr whose base is not `this` (handled above), look up the
+ // field child in the base's per-instance origin tree. This makes loans
+ // flowing into one occurrence of `s.v` visible at later occurrences.
+ if (auto *ME = dyn_cast<MemberExpr>(E))
+ if (auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
+ // E.g. `p->v` walks two hops: DRE outer, then pointer indirection.
+ for (OriginNode *N =
+ getOrCreateNode(ME->getBase()->IgnoreParenImpCasts());
+ N; N = N->getPointeeChild())
+ if (OriginNode *Sub = N->getFieldChild(FD)) {
+ // For non-reference fields (e.g., `View v;` in a record), the
+ // MemberExpr `s.v` is an lvalue (addressable) that can be
+ // borrowed, so we create an outer origin for the lvalue itself,
+ // with the pointee being the field's shared subtree. `&s.v` borrows
+ // the storage of the v-slot in s, not what v refers to.
+ if (doesDeclHaveStorage(FD)) {
+ OriginNode *Outer = createNode(E, QualType{});
+ attachPointeeChild(Outer, Sub);
+ return ExprToNode[E] = Outer;
+ }
+ // For reference-typed fields (e.g., `int& r;` in a record) which
+ // have no storage, the MemberExpr `s.r` directly reuses the
+ // field's subtree.
+ return ExprToNode[E] = Sub;
+ }
+
// If E is an lvalue , it refers to storage. We model this storage as the
// first level of origin list, as if it were a reference, because l-values
are
// addressable.
diff --git a/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp
b/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp
index 5a4de105f217d..ac101f8e9bc13 100644
--- a/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp
+++ b/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp
@@ -162,27 +162,25 @@ struct MemberSetters {
}
};
-// FIXME: Detect escape to field of field.
struct IndirectEscape{
struct {
const char *p;
- } b;
+ } b; // expected-note {{this field dangles}}
void foo() {
std::string s;
- b.p = s.data();
+ b.p = s.data(); // expected-warning {{address of stack memory escapes to a
field}}
}
};
-// FIXME: Dete...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/195603
_______________________________________________
llvm-branch-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits