https://github.com/aeft updated https://github.com/llvm/llvm-project/pull/195603
>From d47012f6239223e75a60409c02c2185e7650bfc6 Mon Sep 17 00:00:00 2001 From: Zhijie Wang <[email protected]> Date: Mon, 4 May 2026 00:21:04 -0700 Subject: [PATCH 1/2] [LifetimeSafety] Track per-field origins for record types --- .../Analyses/LifetimeSafety/FactsGenerator.h | 4 +- .../Analyses/LifetimeSafety/Origins.h | 87 +++-- clang/lib/Analysis/LifetimeSafety/Facts.cpp | 1 + .../LifetimeSafety/FactsGenerator.cpp | 125 ++++--- .../Analysis/LifetimeSafety/LiveOrigins.cpp | 33 +- clang/lib/Analysis/LifetimeSafety/Origins.cpp | 103 +++++- .../warn-lifetime-safety-dangling-field.cpp | 10 +- clang/test/Sema/warn-lifetime-safety.cpp | 318 +++++++++++++++++- 8 files changed, 585 insertions(+), 96 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h index 6248796dbd8e0..d1ddb7cac348f 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/FactsGenerator.h @@ -127,7 +127,9 @@ class FactsGenerator : public ConstStmtVisitor<FactsGenerator> { // (e.g. on the left-hand side of an assignment in the case of a DeclRefExpr). void handleUse(const Expr *E); - void markUseAsWrite(const DeclRefExpr *DRE); + /// Walks the full subtree so origins on the pointee chain and on field + /// children both escape with the returned value. + void emitReturnEscapes(OriginNode *N, const Expr *RetExpr); bool escapesViaReturn(OriginID OID) const; diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h index cacefc8aa62ad..42a2212ebc837 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -21,6 +21,7 @@ #include "clang/Analysis/Analyses/LifetimeSafety/LifetimeStats.h" #include "clang/Analysis/Analyses/LifetimeSafety/Utils.h" #include "clang/Analysis/AnalysisDeclContext.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/raw_ostream.h" namespace clang::lifetimes::internal { @@ -66,47 +67,71 @@ struct Origin { } }; -/// A list of origins representing levels of indirection for pointer-like types. +/// A tree of origins representing the structure of a pointer-like or +/// record type. /// -/// Each node in the list contains an OriginID representing a level of -/// indirection. The list structure captures the multi-level nature of -/// pointer and reference types in the lifetime analysis. +/// Each node carries an OriginID and is connected to children via labeled +/// edges: either a pointee edge (one level of pointer/reference indirection) +/// or a field edge (a named field of a record). Pointer-like types form a +/// pointee chain; record types fan out via field edges. /// /// Examples: -/// - For `int& x`, the list has size 2: +/// - For `int& x`, the chain has length 2: /// * Outer: origin for the reference storage itself (the lvalue `x`) /// * Inner: origin for what `x` refers to /// -/// - For `int* p`, the list has size 2: +/// - For `int* p`, the chain has length 2: /// * Outer: origin for the pointer variable `p` /// * Inner: origin for what `p` points to /// -/// - For `View v` (where View is gsl::Pointer), the list has size 2: +/// - For `View v` (where View is gsl::Pointer), the chain has length 2: /// * Outer: origin for the view object itself /// * Inner: origin for what the view refers to /// -/// - For `int** pp`, the list has size 3: +/// - For `int** pp`, the chain has length 3: /// * Outer: origin for `pp` itself /// * Inner: origin for `*pp` (what `pp` points to) /// * Inner->Inner: origin for `**pp` (what `*pp` points to) /// -/// The list structure enables the analysis to track how loans flow through -/// different levels of indirection when assignments and dereferences occur. -/// -/// TODO: Currently list-shaped (each node has at most one pointee child). -/// Will become tree-shaped once field children are added to support -/// origin trees for records whose fields have origins. +/// The structure enables the analysis to track how loans flow through +/// levels of indirection and across record fields when assignments and +/// dereferences occur. class OriginNode { public: + /// A labeled edge from this node to a child. The label distinguishes how + /// the child is reached: a null `FD` means a pointee edge (one level of + /// pointer/reference indirection); a non-null `FD` means a field edge + /// (the named field of a record). Putting the label on the edge lets + /// one child node play different roles per parent. For example, the subtree + /// for `s`'s `v` field is reached from `s`'s record (FD=v) and from + /// the lvalue outer built for the MemberExpr `s.v` (FD=null). + struct Edge { + const FieldDecl *FD; + OriginNode *Child; + }; + OriginNode(OriginID OID) : OID(OID) {} + OriginID getOriginID() const { return OID; } + + llvm::ArrayRef<Edge> children() const { return Children; } + OriginNode *getPointeeChild() const { - return Children.empty() ? nullptr : Children[0]; + for (const Edge &E : Children) + if (!E.FD) + return E.Child; + return nullptr; } - OriginID getOriginID() const { return OID; } + OriginNode *getFieldChild(const FieldDecl *F) const { + assert(F); + for (const Edge &E : Children) + if (E.FD == F) + return E.Child; + return nullptr; + } - void setChildren(llvm::ArrayRef<OriginNode *> NewChildren) { + void setChildren(llvm::ArrayRef<Edge> NewChildren) { assert(Children.empty() && "children must be set at most once"); Children = NewChildren; } @@ -125,7 +150,7 @@ class OriginNode { private: OriginID OID; - llvm::ArrayRef<OriginNode *> Children; + llvm::ArrayRef<Edge> Children; }; bool doesDeclHaveStorage(const ValueDecl *D); @@ -138,18 +163,19 @@ class OriginManager { /// Gets or creates the OriginNode for a given ValueDecl. /// - /// Creates a list structure mirroring the levels of indirection in the - /// declaration's type (e.g., `int** p` creates list of size 2). + /// Creates a tree structure mirroring the levels of indirection in the + /// declaration's type (e.g., `int* p` creates a chain of length 2). /// /// \returns The OriginNode, or nullptr if the type is not pointer-like. OriginNode *getOrCreateNode(const ValueDecl *D); /// Gets or creates the OriginNode for a given Expr. /// - /// Creates a list based on the expression's type and value category: + /// Creates a tree structure based on the expression's type and value + /// category: /// - Lvalues get an implicit reference level (modeling addressability) /// - Rvalues of non-pointer type return nullptr (no trackable origin) - /// - DeclRefExpr may reuse the underlying declaration's list + /// - DeclRefExpr may reuse the underlying declaration's tree /// /// \returns The OriginNode, or nullptr for non-pointer rvalues. OriginNode *getOrCreateNode(const Expr *E); @@ -183,9 +209,19 @@ class OriginManager { OriginNode *createNode(const Expr *E, QualType QT); void attachPointeeChild(OriginNode *Parent, OriginNode *Pointee); + void attachChildren(OriginNode *Parent, + llvm::ArrayRef<OriginNode::Edge> Children); template <typename T> OriginNode *buildNodeForType(QualType QT, const T *Node); + template <typename T> + OriginNode *buildNodeForTypeImpl(QualType QT, const T *Node, + llvm::SmallPtrSet<const Type *, 4> &Visited, + unsigned FieldDepth); + + /// Whether a record field participates in origin tracking. Plain records + /// only track public fields; lambdas track all fields. + bool isTrackedField(const CXXRecordDecl *RD, const FieldDecl *FD) const; void initializeThisOrigins(const Decl *D); @@ -208,6 +244,13 @@ class OriginManager { /// because of lifetime annotations (currently [[clang::lifetimebound]]) on /// functions that return them. llvm::DenseSet<const Type *> LifetimeAnnotatedOriginTypes; + + /// Field-edge depth limit when building origin trees for record types: + /// - `std::nullopt`: no limit (full field tree). + /// - `0`: disable field tracking (records become single-origin). + /// - `N > 0`: track up to N levels of field edges. + /// Pointee edges are not subject to this limit. + std::optional<size_t> MaxFieldDepth = std::nullopt; }; } // namespace clang::lifetimes::internal diff --git a/clang/lib/Analysis/LifetimeSafety/Facts.cpp b/clang/lib/Analysis/LifetimeSafety/Facts.cpp index d04d3e9202952..25458f6ef9b54 100644 --- a/clang/lib/Analysis/LifetimeSafety/Facts.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Facts.cpp @@ -82,6 +82,7 @@ void UseFact::dump(llvm::raw_ostream &OS, const LoanManager &, OS << "Use ("; size_t NumUsedOrigins = getUsedOrigins()->getLength(); size_t I = 0; + // TODO: pointee-chain only; extend to field children. for (const OriginNode *Cur = getUsedOrigins(); Cur; Cur = Cur->getPointeeChild(), ++I) { OM.dump(Cur->getOriginID(), OS); diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 335c3ae970717..9b1c3ac3a4c41 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -49,9 +49,13 @@ bool FactsGenerator::hasOrigins(const Expr *E) const { /// Propagates origin information from Src to Dst through all levels of /// indirection, creating OriginFlowFacts at each level. /// -/// This function enforces a critical type-safety invariant: both lists must -/// have the same shape (same depth/structure). This invariant ensures that -/// origins flow only between compatible types during expression evaluation. +/// This function enforces a critical type-safety invariant: both trees +/// must have the same pointee-chain depth, and field children are +/// matched by `FieldDecl`. This invariant ensures that origins flow only +/// between compatible types during expression evaluation. Field pairs +/// found on both sides recurse; unmatched fields are skipped, which is +/// exercised by `CK_DerivedToBase` flows where Base's and Derived's +/// trees carry distinct direct-field FDs. /// /// Examples: /// - `int* p = &x;` flows origins from `&x` (depth 1) to `p` (depth 1) @@ -59,17 +63,24 @@ bool FactsGenerator::hasOrigins(const Expr *E) const { /// * Level 1: pp <- p's address /// * Level 2: (*pp) <- what p points to (i.e., &x) /// - `View v = obj;` flows origins from `obj` (depth 1) to `v` (depth 1) +/// - `S s2 = s;` flows the top-level origin and recursively flows each +/// matching `FieldDecl` subtree, so loans on `s.v.inner` propagate to +/// `s2.v.inner`. void FactsGenerator::flow(OriginNode *Dst, OriginNode *Src, bool Kill) { if (!Dst) return; assert(Src && "Dst is non-null but Src is null. List must have the same length"); assert(Dst->getLength() == Src->getLength() && - "Lists must have the same length"); + "Pointee chains must have the same length"); while (Dst && Src) { CurrentBlockFacts.push_back(FactMgr.createFact<OriginFlowFact>( Dst->getOriginID(), Src->getOriginID(), Kill)); + for (const OriginNode::Edge &E : Dst->children()) + if (E.FD) + if (OriginNode *SrcF = Src->getFieldChild(E.FD)) + flow(E.Child, SrcF, Kill); Dst = Dst->getPointeeChild(); Src = Src->getPointeeChild(); } @@ -261,11 +272,15 @@ void FactsGenerator::VisitCXXMemberCallExpr(const CXXMemberCallExpr *MCE) { } void FactsGenerator::VisitMemberExpr(const MemberExpr *ME) { - auto *MD = ME->getMemberDecl(); - if (isa<FieldDecl>(MD) && doesDeclHaveStorage(MD)) { - assert(ME->isGLValue() && "Field member should be GL value"); - OriginNode *Dst = getOriginNode(*ME); - assert(Dst && "Field member should have an origin list as it is GL value"); + auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()); + if (!FD) + return; + + assert(ME->isGLValue() && "Field member should be GL value"); + OriginNode *Dst = getOriginNode(*ME); + assert(Dst && "Field member should have an origin list as it is GL value"); + + if (doesDeclHaveStorage(FD)) { OriginNode *Src = getOriginNode(*ME->getBase()); assert(Src && "Base expression should be a pointer/reference type"); // The field's glvalue (outermost origin) holds the same loans as the base @@ -274,6 +289,21 @@ void FactsGenerator::VisitMemberExpr(const MemberExpr *ME) { Dst->getOriginID(), Src->getOriginID(), /*Kill=*/true)); } + + // Narrow the UseFact's liveness coverage to the accessed field's + // subtree. + // + // E.g., for `(void)s.inner`, without narrowing, the UseFact at `s` + // would keep `s.v`'s subtree live and falsely flag a UAF when a loan + // held by `s.v` has already expired. + if (UseFact *UF = UseFacts.lookup(ME->getBase())) { + assert(!UseFacts.contains(ME) && "ME already has a UseFact"); + OriginNode *NewUsedOrigins = + doesDeclHaveStorage(FD) ? Dst->getPointeeChild() : Dst; + UF->setUsedOrigins(NewUsedOrigins); + UseFacts[ME] = UF; + UseFacts.erase(ME->getBase()); + } } void FactsGenerator::VisitCallExpr(const CallExpr *CE) { @@ -365,31 +395,42 @@ void FactsGenerator::VisitUnaryOperator(const UnaryOperator *UO) { } } +void FactsGenerator::emitReturnEscapes(OriginNode *N, const Expr *RetExpr) { + if (!N) + return; + EscapesInCurrentBlock.push_back( + FactMgr.createFact<ReturnEscapeFact>(N->getOriginID(), RetExpr)); + for (const OriginNode::Edge &E : N->children()) + emitReturnEscapes(E.Child, RetExpr); +} + void FactsGenerator::VisitReturnStmt(const ReturnStmt *RS) { - if (const Expr *RetExpr = RS->getRetValue()) { - if (OriginNode *Node = getOriginNode(*RetExpr)) - for (OriginNode *L = Node; L != nullptr; L = L->getPointeeChild()) - EscapesInCurrentBlock.push_back( - FactMgr.createFact<ReturnEscapeFact>(L->getOriginID(), RetExpr)); - } + if (const Expr *RetExpr = RS->getRetValue()) + emitReturnEscapes(getOriginNode(*RetExpr), RetExpr); } void FactsGenerator::handleAssignment(const Expr *LHSExpr, const Expr *RHSExpr) { LHSExpr = LHSExpr->IgnoreParenImpCasts(); OriginNode *LHSNode = nullptr; + QualType LHSType; + UseFact *LHSUseFact = nullptr; if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) { LHSNode = getOriginNode(*DRE_LHS); assert(LHSNode && "LHS is a DRE and should have an origin list"); - } - // Handle assignment to member fields (e.g., `this->view = s` or `view = s`). - // This enables detection of dangling fields when local values escape to - // fields. - if (const auto *ME_LHS = dyn_cast<MemberExpr>(LHSExpr)) { + LHSType = DRE_LHS->getDecl()->getType(); + LHSUseFact = UseFacts.lookup(DRE_LHS); + } else if (const auto *ME_LHS = dyn_cast<MemberExpr>(LHSExpr)) { + // Handle assignment to member fields (e.g., `this->view = s` or `view = + // s`). This enables detection of dangling fields when local values escape + // to fields. LHSNode = getOriginNode(*ME_LHS); assert(LHSNode && "LHS is a MemberExpr and should have an origin list"); + LHSType = ME_LHS->getMemberDecl()->getType(); + LHSUseFact = UseFacts.lookup(ME_LHS); } + if (!LHSNode) return; OriginNode *RHSNode = getOriginNode(*RHSExpr); @@ -400,28 +441,26 @@ void FactsGenerator::handleAssignment(const Expr *LHSExpr, // assigned. RHSNode = getRValueOrigins(RHSExpr, RHSNode); - if (const auto *DRE_LHS = dyn_cast<DeclRefExpr>(LHSExpr)) { - QualType QT = DRE_LHS->getDecl()->getType(); - if (QT->isReferenceType()) { - if (hasOrigins(QT->getPointeeType())) { + if (LHSUseFact) { + if (LHSType->isReferenceType()) { + if (hasOrigins(LHSType->getPointeeType())) { // Writing through a reference uses the binding but overwrites the // pointee. Model this as a Read of the outer origin (keeping the // binding live) and a Write of the inner origins (killing the pointee's // liveness). - if (UseFact *UF = UseFacts.lookup(DRE_LHS)) { - const OriginNode *FullNode = UF->getUsedOrigins(); - assert(FullNode); - UF->setUsedOrigins(FactMgr.getOriginMgr().createSingleOriginNode( - FullNode->getOriginID())); - if (const OriginNode *InnerNode = FullNode->getPointeeChild()) { - UseFact *WriteUF = FactMgr.createFact<UseFact>(DRE_LHS, InnerNode); - WriteUF->markAsWritten(); - CurrentBlockFacts.push_back(WriteUF); - } + const OriginNode *FullNode = LHSUseFact->getUsedOrigins(); + assert(FullNode); + LHSUseFact->setUsedOrigins( + FactMgr.getOriginMgr().createSingleOriginNode( + FullNode->getOriginID())); + if (const OriginNode *InnerNode = FullNode->getPointeeChild()) { + UseFact *WriteUF = FactMgr.createFact<UseFact>(LHSExpr, InnerNode); + WriteUF->markAsWritten(); + CurrentBlockFacts.push_back(WriteUF); } } } else - markUseAsWrite(DRE_LHS); + LHSUseFact->markAsWritten(); } if (!RHSNode) { // RHS has no tracked origins (e.g., assigning a callable without origins @@ -563,9 +602,14 @@ void FactsGenerator::VisitCXXFunctionalCastExpr( void FactsGenerator::VisitInitListExpr(const InitListExpr *ILE) { if (!hasOrigins(ILE)) return; - // For list initialization with a single element, like `View{...}`, the - // origin of the list itself is the origin of its single element. - if (ILE->getNumInits() == 1) + // For list initialization with a single element of the same type, like + // `View{other}`, the origin of the list itself is the origin of its single + // element. + // + // TODO: Handle aggregate (record/array) list initialization. + if (ILE->getNumInits() == 1 && + ILE->getType().getCanonicalType() == + ILE->getInit(0)->getType().getCanonicalType()) killAndFlowOrigin(*ILE, *ILE->getInit(0)); } @@ -1006,11 +1050,6 @@ void FactsGenerator::handleUse(const Expr *E) { } } -void FactsGenerator::markUseAsWrite(const DeclRefExpr *DRE) { - if (UseFacts.contains(DRE)) - UseFacts[DRE]->markAsWritten(); -} - // Creates an IssueFact for a new placeholder loan for each pointer or reference // parameter at the function's entry. llvm::SmallVector<Fact *> FactsGenerator::issuePlaceholderLoans() { diff --git a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp index a23d7f224b4bc..c1f82796a601e 100644 --- a/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/LiveOrigins.cpp @@ -127,21 +127,30 @@ class AnalysisImpl /// A read operation makes the origin live with definite confidence, as it /// dominates this program point. A write operation kills the liveness of /// the origin since it overwrites the value. + /// + /// Walks the full subtree so loans held by any descendant (pointee + /// chain or field child) become visible at the use site. Lattice transfer(Lattice In, const UseFact &UF) { + return transferUseSubtree(In, UF, UF.getUsedOrigins()); + } + + Lattice transferUseSubtree(Lattice In, const UseFact &UF, + const OriginNode *Cur) { + if (!Cur) + return In; + OriginID OID = Cur->getOriginID(); Lattice Out = In; - for (const OriginNode *Cur = UF.getUsedOrigins(); Cur; - Cur = Cur->getPointeeChild()) { - OriginID OID = Cur->getOriginID(); - // Write kills liveness. - if (UF.isWritten()) { - Out = Lattice(Factory.remove(Out.LiveOrigins, OID)); - } else { - // Read makes origin live with definite confidence (dominates this - // point). - Out = Lattice(Factory.add(Out.LiveOrigins, OID, - LivenessInfo(&UF, LivenessKind::Must))); - } + // Write kills liveness. + if (UF.isWritten()) { + Out = Lattice(Factory.remove(Out.LiveOrigins, OID)); + } else { + // Read makes origin live with definite confidence (dominates this + // point). + Out = Lattice(Factory.add(Out.LiveOrigins, OID, + LivenessInfo(&UF, LivenessKind::Must))); } + for (const OriginNode::Edge &E : Cur->children()) + Out = transferUseSubtree(Out, UF, E.Child); return Out; } diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp index 41d3adaaae59f..bbfea9cd0dff2 100644 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -112,16 +112,28 @@ bool OriginManager::hasOrigins(QualType QT) const { // stored lambda's origins. if (isStdCallableWrapperType(RD)) return true; - // TODO: Limit to lambdas for now. This will be extended to user-defined - // structs with pointer-like fields. - if (!RD->isLambda()) + // A lambda has origins when any capture has a tracked type; the lambda + // itself is tracked as a single origin. + if (RD->isLambda()) { + for (const auto *FD : RD->fields()) + if (hasOrigins(FD->getType())) + return true; + return false; + } + // TODO: Unions are not tracked. + if (RD->isUnion()) return false; for (const auto *FD : RD->fields()) - if (hasOrigins(FD->getType())) + if (isTrackedField(RD, FD)) return true; return false; } +bool OriginManager::isTrackedField(const CXXRecordDecl *RD, + const FieldDecl *FD) const { + return FD->getAccess() == AS_public && hasOrigins(FD->getType()); +} + /// Determines if an expression has origins that need to be tracked. /// /// An expression has origins if: @@ -193,13 +205,42 @@ OriginNode *OriginManager::createSingleOriginNode(OriginID OID) { void OriginManager::attachPointeeChild(OriginNode *Parent, OriginNode *Pointee) { assert(Pointee && "pointee subtree must be non-null"); - Parent->setChildren( - {new (Allocator.Allocate<OriginNode *>()) OriginNode *(Pointee), 1}); + auto *E = new (Allocator.Allocate<OriginNode::Edge>()) + OriginNode::Edge{nullptr, Pointee}; + Parent->setChildren({E, 1}); +} + +void OriginManager::attachChildren(OriginNode *Parent, + llvm::ArrayRef<OriginNode::Edge> Children) { + Parent->setChildren(Children.copy(Allocator)); } template <typename T> OriginNode *OriginManager::buildNodeForType(QualType QT, const T *Node) { - assert(hasOrigins(QT) && "buildNodeForType called for non-pointer type"); + llvm::SmallPtrSet<const Type *, 4> Visited; + return buildNodeForTypeImpl(QT, Node, Visited, 0); +} + +template <typename T> +OriginNode * +OriginManager::buildNodeForTypeImpl(QualType QT, const T *Node, + llvm::SmallPtrSet<const Type *, 4> &Visited, + unsigned FieldDepth) { + assert(hasOrigins(QT) && "buildNodeForType called for type without origins"); + + const auto *RD = QT->getAsCXXRecordDecl(); + const Type *Canonical = QT.getCanonicalType().getTypePtr(); + // Cycle cut: only records enter Visited; re-entering one returns a + // leaf to stop descending further. Loans landing on the cut leaf are + // dropped (e.g., through `n->next->next`). + // + // Pointer/reference types stay transparent: including them in Visited + // would make the same record's shape depend on the entry path. E.g., + // Node's Sub_next would have length 2 from a Node start but length 1 + // from a Node* start, breaking flow's length assertion. + if (RD && !Visited.insert(Canonical).second) + return createNode(Node, QT); + OriginNode *Head = createNode(Node, QT); if (QT->isPointerOrReferenceType()) { @@ -207,8 +248,28 @@ OriginNode *OriginManager::buildNodeForType(QualType QT, const T *Node) { // We recurse if the pointee type is pointer-like, to build the next // level in the origin tree. E.g., for T*& / View&. if (hasOrigins(PointeeTy)) - attachPointeeChild(Head, buildNodeForType(PointeeTy, Node)); + attachPointeeChild( + Head, buildNodeForTypeImpl(PointeeTy, Node, Visited, FieldDepth)); + } else if (RD) { + bool WithinFieldDepthLimit = !MaxFieldDepth || FieldDepth < *MaxFieldDepth; + bool shouldExpandFields = + !(isGslPointerType(QT) || isStdCallableWrapperType(RD) || + LifetimeAnnotatedOriginTypes.contains(Canonical) || RD->isLambda()) && + WithinFieldDepthLimit; + if (shouldExpandFields) { + SmallVector<OriginNode::Edge, 4> FieldChildren; + for (const FieldDecl *F : RD->fields()) + if (isTrackedField(RD, F)) { + OriginNode *Sub = + buildNodeForTypeImpl(F->getType(), Node, Visited, FieldDepth + 1); + FieldChildren.push_back({F, Sub}); + } + attachChildren(Head, FieldChildren); + } } + + if (RD) + Visited.erase(Canonical); return Head; } @@ -273,6 +334,32 @@ OriginNode *OriginManager::getOrCreateNode(const Expr *E) { return ExprToNode[E] = Head; } + // For a MemberExpr whose base is not `this` (handled above), look up the + // field child in the base's per-instance origin tree. This makes loans + // flowing into one occurrence of `s.v` visible at later occurrences. + if (auto *ME = dyn_cast<MemberExpr>(E)) + if (auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) + // E.g. `p->v` walks two hops: DRE outer, then pointer indirection. + for (OriginNode *N = + getOrCreateNode(ME->getBase()->IgnoreParenImpCasts()); + N; N = N->getPointeeChild()) + if (OriginNode *Sub = N->getFieldChild(FD)) { + // For non-reference fields (e.g., `View v;` in a record), the + // MemberExpr `s.v` is an lvalue (addressable) that can be + // borrowed, so we create an outer origin for the lvalue itself, + // with the pointee being the field's shared subtree. `&s.v` borrows + // the storage of the v-slot in s, not what v refers to. + if (doesDeclHaveStorage(FD)) { + OriginNode *Outer = createNode(E, QualType{}); + attachPointeeChild(Outer, Sub); + return ExprToNode[E] = Outer; + } + // For reference-typed fields (e.g., `int& r;` in a record) which + // have no storage, the MemberExpr `s.r` directly reuses the + // field's subtree. + return ExprToNode[E] = Sub; + } + // If E is an lvalue , it refers to storage. We model this storage as the // first level of origin list, as if it were a reference, because l-values are // addressable. diff --git a/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp b/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp index 5a4de105f217d..ac101f8e9bc13 100644 --- a/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp +++ b/clang/test/Sema/warn-lifetime-safety-dangling-field.cpp @@ -162,27 +162,25 @@ struct MemberSetters { } }; -// FIXME: Detect escape to field of field. struct IndirectEscape{ struct { const char *p; - } b; + } b; // expected-note {{this field dangles}} void foo() { std::string s; - b.p = s.data(); + b.p = s.data(); // expected-warning {{address of stack memory escapes to a field}} } }; -// FIXME: Detect escape to field of field. struct IndirectEscape2 { - struct { + struct { // expected-note {{this field dangles}} const char *p; }; void foo() { std::string s; - p = s.data(); + p = s.data(); // expected-warning {{address of stack memory escapes to a field}} } }; diff --git a/clang/test/Sema/warn-lifetime-safety.cpp b/clang/test/Sema/warn-lifetime-safety.cpp index 30b450c333fbd..2b0ac87d1a11d 100644 --- a/clang/test/Sema/warn-lifetime-safety.cpp +++ b/clang/test/Sema/warn-lifetime-safety.cpp @@ -3124,14 +3124,13 @@ struct T { } }; -// FIXME: false-negative void foo() { S s; { int num; - s.p_ = # // does not warn - } - s.bar(); + s.p_ = # // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + s.bar(); // expected-note {{later used here}} s.p_ = &GLOBAL_INT; } @@ -3284,3 +3283,314 @@ void assign_non_capturing_to_function_ref(function_ref &r) { } } // namespace GH126600 + +namespace tree_origin { + +struct Inner { + View v; + View w; +}; + +struct S { + View v; + Inner inner; + int x; +}; + +void struct_field() { + S s; + { + MyObj obj; + s.v = obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + s.v.use(); // expected-note {{later used here}} +} + +void struct_field2() { + S s; + { + MyObj obj; + s.inner.v = obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)s.inner; // expected-note {{later used here}} +} + +void struct_field3() { + S s; + { + MyObj obj; + s.inner.v = obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + s.inner.v.use(); // expected-note {{later used here}} +} + +void struct_field_safe() { + S s; + { + MyObj obj; + s.v = obj; + } + (void)s.inner; +} + +void struct_field_safe2() { + S s; + { + MyObj obj; + s.inner.v = obj; + } + (void)s.inner.w; +} + +S return_struct() { + S s; + MyObj local; + s.v = local; // expected-warning {{address of stack memory is returned later}} + return s; // expected-note {{returned here}} +} + +S return_struct_assign() { + S s, s2; + MyObj local; + s.v = local; // expected-warning {{address of stack memory is returned later}} + s2 = s; + return s2; // expected-note {{returned here}} +} + +View *return_field_addr() { + S s; + View* v = &s.v; // expected-warning {{address of stack memory is returned later}} + return v; // expected-note {{returned here}} +} + +void struct_field_in_loop(int n) { + S s; + for (int i = 0; i < n; i++) { + MyObj obj; + s.v = obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)s; // expected-note {{later used here}} +} + +void struct_field_in_loop_safe(int n) { + for (int i = 0; i < n; i++) { + MyObj obj; + S s; + s.v = obj; + } +} + +void struct_field_in_loop_safe2(int n) { + S s; + for (int i = 0; i < n; i++) { + MyObj obj; + s.v = obj; + } +} + +void struct_field_read_in_loop_safe(int n) { + for (int i = 0; i < n; i++) { + S s; + (void)s.v; + } +} + +void struct_int_field_read_in_loop_safe(int n) { + for (int i = 0; i < n; i++) { + S s; + (void)s.x; + } +} + +void field_pointee_propagate_in_loop_safe(int n) { + View outer; + MyObj obj; + for (int i = 0; i < n; i++) { + S local; + local.v = obj; + outer = local.v; + } + outer.use(); +} + +struct PtrRef { + MyObj *&ref; +}; + +void ref_field_dangle() { + MyObj *p; + PtrRef h{p}; + { + MyObj temp; + h.ref = &temp; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)*h.ref; // expected-note {{later used here}} +} + +void ref_field_dangle_then_rescue() { + MyObj safe; + MyObj *p; + PtrRef h{p}; + { + MyObj temp; + h.ref = &temp; + } + h.ref = &safe; + (void)*h.ref; +} + +struct Node { + Node *next; + View v; +}; + +void self_referential_struct_field() { + Node n; + { + MyObj obj; + n.v = obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + n.v.use(); // expected-note {{later used here}} +} + +void self_referential_pointer_assign() { + Node n; + { + Node child; + n.next = &child; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)n; // expected-note {{later used here}} +} + +struct SelfRefMultiPtr { + SelfRefMultiPtr *p1; + SelfRefMultiPtr **p2; +}; + +void self_referential_mixed_pointer_depth() { + SelfRefMultiPtr s; + { + SelfRefMultiPtr *p; + s.p2 = &p; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)s; // expected-note {{later used here}} +} + +// FIXME: False negative. The cycle-cut leaf at `n.next`'s pointee has +// no field edges, so the loan on `n.next->v` lands on an orphan tree +// disconnected from n. +void self_referential_pointer_field_write() { + Node n, child; + n.next = &child; + { + MyObj local; + n.next->v = local; + } + (void)n; // Should warn. +} + +struct DerivedView : S { + View v2; +}; + +void derived_to_base_upcast() { + DerivedView d; + // CK_DerivedToBase: Src and Dst children sets diverge in flow (Base and + // Derived have different direct fields), so flow must skip unmatched + // fields without crashing. + S& b = d; + (void)b; +} + +// FIXME: False positive. +void derived_field_safe() { + DerivedView d; + { + MyObj obj; + d.v2 = obj; // expected-warning {{object whose reference is captured does not live long enough}} + } // expected-note {{destroyed here}} + (void)d.v; // expected-note {{later used here}} +} + +class PrivateHolder { +private: + View v; + friend PrivateHolder return_private_holder(); +}; + +// FIXME: False negative. Don't track origins for private fields for now. +PrivateHolder return_private_holder() { + PrivateHolder h; + MyObj local; + h.v = local; + return h; // Should warn. +} + +struct UnionStruct { + union { + int* p; + float f; + }; +}; + +// FIXME: False negative. Don't track origins for union for now. +UnionStruct union_field() { + UnionStruct u; + int x; + u.p = &x; + return u; // Should warn. +} + +struct StoreS { + View v; + void store(const MyObj& obj) { v = obj; } +}; + +// FIXME: False negative. Two pieces missing: +// 1. Infer `lifetime_capture_by(this)` on non-ctor methods that +// capture a parameter into a field. +// 2. Consume the attribute in `handleFunctionCall` and flow the +// captured argument's origin into the corresponding target at +// the call site. +StoreS return_after_this_set() { + StoreS s; + MyObj local; + s.store(local); + return s; // Should warn. +} + +struct BaseS { View v; }; +struct DerivedS : BaseS {}; + +// FIXME: False negative. `d.v` accesses an inherited field. Only track +// direct fields for now, so DerivedS's tree has no edge for `v` and the +// loan does not propagate. +DerivedS inherited_field() { + DerivedS d; + MyObj local; + d.v = local; + return d; // Should warn. +} + +// FIXME: False negative. `p->v1 = local` deposits into p's origin tree, +// which is independent of s's tree after the initial `p = &s` flow. +// Requires alias analysis. +S field_write_via_pointer() { + S s; + MyObj local; + S *p = &s; + p->v = local; + return s; // Should warn. +} + +struct ViewPtrHolder { View *p; }; + +// FIXME: False negative. Aggregate (record/array) list initialization is +// not handled, so the loan from `&v` does not flow into `h.p`. +View *struct_init_single_field() { + View v; + ViewPtrHolder h{&v}; + return h.p; // Should warn. +} + +} // namespace tree_origin >From 1693bc2282c978701dd3fe801b3d0dd0399a8da0 Mon Sep 17 00:00:00 2001 From: Zhijie Wang <[email protected]> Date: Tue, 12 May 2026 23:12:48 -0700 Subject: [PATCH 2/2] only track origins for accessed fields --- .../Analyses/LifetimeSafety/Origins.h | 11 ++++++ .../LifetimeSafety/FactsGenerator.cpp | 7 ++-- clang/lib/Analysis/LifetimeSafety/Origins.cpp | 39 ++++++++++++++++++- 3 files changed, 52 insertions(+), 5 deletions(-) diff --git a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h index 42a2212ebc837..458a5dddb9a95 100644 --- a/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h +++ b/clang/include/clang/Analysis/Analyses/LifetimeSafety/Origins.h @@ -197,6 +197,10 @@ class OriginManager { bool hasOrigins(QualType QT) const; bool hasOrigins(const Expr *E) const; + bool isAccessedField(const FieldDecl *FD) const { + return AccessedFields.contains(FD); + } + void dump(OriginID OID, llvm::raw_ostream &OS) const; /// Collects statistics about expressions that lack associated origins. @@ -231,6 +235,10 @@ class OriginManager { void collectLifetimeAnnotatedOriginTypes(const AnalysisDeclContext &AC); void registerLifetimeAnnotatedOriginType(QualType QT); + /// Pre-scans the function body (and constructor init lists) to discover + /// the fields it accesses; the rest are excluded from origin tracking. + void collectAccessedFields(const AnalysisDeclContext &AC); + ASTContext &AST; OriginID NextOriginID{0}; /// TODO(opt): Profile and evaluate the usefulness of small buffer @@ -244,6 +252,9 @@ class OriginManager { /// because of lifetime annotations (currently [[clang::lifetimebound]]) on /// functions that return them. llvm::DenseSet<const Type *> LifetimeAnnotatedOriginTypes; + /// Fields accessed in the function body (or constructor init lists). + /// Fields outside this set are excluded from origin tracking. + llvm::SmallPtrSet<const FieldDecl *, 8> AccessedFields; /// Field-edge depth limit when building origin trees for record types: /// - `std::nullopt`: no limit (full field tree). diff --git a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp index 9b1c3ac3a4c41..a91d8ba1e131a 100644 --- a/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp +++ b/clang/lib/Analysis/LifetimeSafety/FactsGenerator.cpp @@ -895,9 +895,10 @@ void FactsGenerator::handleImplicitObjectFieldUses(const Expr *Call, const auto UseFields = [&](const CXXRecordDecl *RD) { for (const auto *Field : RD->fields()) - if (auto *FieldNode = getOriginNode(*Field)) - CurrentBlockFacts.push_back( - FactMgr.createFact<UseFact>(Call, FieldNode)); + if (FactMgr.getOriginMgr().isAccessedField(Field)) + if (auto *FieldNode = getOriginNode(*Field)) + CurrentBlockFacts.push_back( + FactMgr.createFact<UseFact>(Call, FieldNode)); }; UseFields(ClassDecl); diff --git a/clang/lib/Analysis/LifetimeSafety/Origins.cpp b/clang/lib/Analysis/LifetimeSafety/Origins.cpp index bbfea9cd0dff2..0e62a535f9825 100644 --- a/clang/lib/Analysis/LifetimeSafety/Origins.cpp +++ b/clang/lib/Analysis/LifetimeSafety/Origins.cpp @@ -98,6 +98,25 @@ class LifetimeAnnotatedOriginTypeCollector } }; +class AccessedFieldCollector + : public RecursiveASTVisitor<AccessedFieldCollector> { +public: + bool VisitMemberExpr(const MemberExpr *ME) { + if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) + AccessedFields.insert(FD); + return true; + } + + bool shouldVisitTemplateInstantiations() const { return true; } + + const llvm::SmallPtrSet<const FieldDecl *, 8> &getAccessedFields() const { + return AccessedFields; + } + +private: + llvm::SmallPtrSet<const FieldDecl *, 8> AccessedFields; +}; + } // namespace bool OriginManager::hasOrigins(QualType QT) const { @@ -124,14 +143,15 @@ bool OriginManager::hasOrigins(QualType QT) const { if (RD->isUnion()) return false; for (const auto *FD : RD->fields()) - if (isTrackedField(RD, FD)) + if (FD->getAccess() == AS_public && hasOrigins(FD->getType())) return true; return false; } bool OriginManager::isTrackedField(const CXXRecordDecl *RD, const FieldDecl *FD) const { - return FD->getAccess() == AS_public && hasOrigins(FD->getType()); + return FD->getAccess() == AS_public && hasOrigins(FD->getType()) && + isAccessedField(FD); } /// Determines if an expression has origins that need to be tracked. @@ -172,6 +192,7 @@ bool doesDeclHaveStorage(const ValueDecl *D) { OriginManager::OriginManager(const AnalysisDeclContext &AC) : AST(AC.getASTContext()) { collectLifetimeAnnotatedOriginTypes(AC); + collectAccessedFields(AC); initializeThisOrigins(AC.getDecl()); } @@ -417,4 +438,18 @@ void OriginManager::registerLifetimeAnnotatedOriginType(QualType QT) { LifetimeAnnotatedOriginTypes.insert(QT.getCanonicalType().getTypePtr()); } +void OriginManager::collectAccessedFields(const AnalysisDeclContext &AC) { + AccessedFieldCollector Collector; + if (Stmt *Body = AC.getBody()) + Collector.TraverseStmt(Body); + if (const auto *CD = dyn_cast<CXXConstructorDecl>(AC.getDecl())) + for (const auto *Init : CD->inits()) { + if (const FieldDecl *FD = Init->getAnyMember()) + AccessedFields.insert(FD); + if (Expr *InitE = Init->getInit()) + Collector.TraverseStmt(InitE); + } + AccessedFields.insert_range(Collector.getAccessedFields()); +} + } // namespace clang::lifetimes::internal _______________________________________________ llvm-branch-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
