https://github.com/ziqingluo-90 updated https://github.com/llvm/llvm-project/pull/205430
>From 0ca87a1c6d60ebaa9369283a8ad21450e209f7e3 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Tue, 23 Jun 2026 13:17:16 -0700 Subject: [PATCH 1/6] [clang-index][USR] GenLoc prints file entry at most once, allow repeated offsets GenLoc previously printed the source location at most once per USR, gated by a member flag toggled on the first call. During the recursive visit, if both an outer and an inner decl needed to print the location, only the outer one was printed. When the outer decl did not need the offset, no offset was ever printed. For example, the USR of `Holder<decltype([]{})>::method` depends on the location of the type of the lambda but the outer decl prints the file entry only, which disables offset printing. Change the logic so the file-entry part of the location is printed at most once (it must be identical), while offsets of sub-decl locations may be printed multiple times. --- .../UnifiedSymbolResolution/USRGeneration.cpp | 45 ++++++++++++++----- clang/unittests/Index/IndexTests.cpp | 34 ++++++++++++++ 2 files changed, 69 insertions(+), 10 deletions(-) diff --git a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp index c4788dd6917f2..7c254e0c39c32 100644 --- a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp +++ b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp @@ -24,6 +24,22 @@ using namespace clang::index; // USR generation. //===----------------------------------------------------------------------===// +/// Print only the offset part of \p Loc +/// \returns true on error. +static bool printLocOffset(llvm::raw_ostream &OS, SourceLocation Loc, + const SourceManager &SM) { + if (Loc.isInvalid()) + return true; + Loc = SM.getExpansionLoc(Loc); + // Use the offest into the FileID to represent the location. Using + // a line/column can cause us to look back at the original source file, + // which is expensive. + OS << '@' << SM.getDecomposedLoc(Loc).second; + return false; +} + +/// Print \p Loc including both the file and offset, if \p IncludeOffset is +/// true. Print only the file of \p Loc otherwise. /// \returns true on error. static bool printLoc(llvm::raw_ostream &OS, SourceLocation Loc, const SourceManager &SM, bool IncludeOffset) { @@ -64,7 +80,11 @@ class USRGenerator : public ConstDeclVisitor<USRGenerator> { ASTContext *Context; const LangOptions &LangOpts; bool IgnoreResults = false; - bool generatedLoc = false; + // The flag below ensures that, when a source location needs to be printed, + // the file entry is printed at most once during the recursive visit. The + // offset part may be printed multiple times, since sub-decls along the + // path may each need a distinct location to disambiguate them. + bool GeneratedFileEntry = false; llvm::DenseMap<const Type *, unsigned> TypeSubstitutions; @@ -636,23 +656,28 @@ void USRGenerator::GenExtSymbolContainer(const NamedDecl *D) { } bool USRGenerator::GenLoc(const Decl *D, bool IncludeOffset) { - if (generatedLoc) - return IgnoreResults; - generatedLoc = true; - // Guard against null declarations in invalid code. if (!D) { IgnoreResults = true; return true; } + // Do nothing if file entry has been printed and no need to print the offset. + if (GeneratedFileEntry && !IncludeOffset) + return IgnoreResults; + + bool PrintErr = false; // Use the location of canonical decl. D = D->getCanonicalDecl(); - - IgnoreResults = - IgnoreResults || printLoc(Out, D->getBeginLoc(), - Context->getSourceManager(), IncludeOffset); - + if (!GeneratedFileEntry) { + GeneratedFileEntry = true; + PrintErr = printLoc(Out, D->getBeginLoc(), Context->getSourceManager(), + IncludeOffset); + } else { + PrintErr = + printLocOffset(Out, D->getBeginLoc(), Context->getSourceManager()); + } + IgnoreResults = IgnoreResults || PrintErr; return IgnoreResults; } diff --git a/clang/unittests/Index/IndexTests.cpp b/clang/unittests/Index/IndexTests.cpp index 6df4b577d98a0..90d47657fc5cc 100644 --- a/clang/unittests/Index/IndexTests.cpp +++ b/clang/unittests/Index/IndexTests.cpp @@ -9,6 +9,7 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/RecursiveASTVisitor.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/CompilerInstance.h" @@ -18,6 +19,7 @@ #include "clang/Index/IndexingAction.h" #include "clang/Lex/Preprocessor.h" #include "clang/Tooling/Tooling.h" +#include "clang/UnifiedSymbolResolution/USRGeneration.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/VirtualFileSystem.h" #include "gmock/gmock.h" @@ -451,6 +453,38 @@ TEST(IndexTest, ReadWriteRoles) { WrittenAt(Position(6, 17)))))); } +// Two `Holder<decltype([]{})>` instantiations have distinct closure types, +// therefore they should have distinct USRs. +TEST(USRTest, NestedLambdaTagsInTemplateArgGetDistinctUSRs) { + auto AST = tooling::buildASTFromCodeWithArgs( + R"cpp( + template <class T> struct Holder { void reset(T*) {} }; + void caller() { + Holder<decltype([]{})>().reset(nullptr); + Holder<decltype([]{})>().reset(nullptr); + } + )cpp", + {"-std=c++20"}); + ASSERT_TRUE(AST); + + struct Visitor : RecursiveASTVisitor<Visitor> { + std::vector<const ClassTemplateSpecializationDecl *> Specs; + bool VisitCXXMemberCallExpr(CXXMemberCallExpr *Call) { + if (auto *M = Call->getMethodDecl()) + if (auto *S = dyn_cast<ClassTemplateSpecializationDecl>(M->getParent())) + Specs.push_back(S); + return true; + } + } V; + V.TraverseAST(AST->getASTContext()); + ASSERT_EQ(V.Specs.size(), 2u); + + llvm::SmallString<128> U0, U1; + ASSERT_FALSE(generateUSRForDecl(V.Specs[0], U0)); + ASSERT_FALSE(generateUSRForDecl(V.Specs[1], U1)); + EXPECT_NE(U0, U1) << "U0=" << U0.str() << " U1=" << U1.str(); +} + } // namespace } // namespace index } // namespace clang >From 8e202846e62c441f14d1d443e7ec67508862093b Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Wed, 24 Jun 2026 16:23:53 -0700 Subject: [PATCH 2/6] address comments --- .../UnifiedSymbolResolution/USRGeneration.cpp | 14 ++-- clang/unittests/Index/IndexTests.cpp | 78 +++++++++++++++---- 2 files changed, 71 insertions(+), 21 deletions(-) diff --git a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp index 7c254e0c39c32..a3885640427a5 100644 --- a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp +++ b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp @@ -81,10 +81,10 @@ class USRGenerator : public ConstDeclVisitor<USRGenerator> { const LangOptions &LangOpts; bool IgnoreResults = false; // The flag below ensures that, when a source location needs to be printed, - // the file entry is printed at most once during the recursive visit. The + // the filename is printed at most once during the recursive visit. The // offset part may be printed multiple times, since sub-decls along the - // path may each need a distinct location to disambiguate them. - bool GeneratedFileEntry = false; + // visit may each need a distinct location to disambiguate them. + bool GeneratedFilename = false; llvm::DenseMap<const Type *, unsigned> TypeSubstitutions; @@ -661,16 +661,16 @@ bool USRGenerator::GenLoc(const Decl *D, bool IncludeOffset) { IgnoreResults = true; return true; } - // Do nothing if file entry has been printed and no need to print the offset. - if (GeneratedFileEntry && !IncludeOffset) + // Do nothing if no need to print the offset nor the filename: + if (!IncludeOffset && GeneratedFilename) return IgnoreResults; bool PrintErr = false; // Use the location of canonical decl. D = D->getCanonicalDecl(); - if (!GeneratedFileEntry) { - GeneratedFileEntry = true; + if (!GeneratedFilename) { + GeneratedFilename = true; PrintErr = printLoc(Out, D->getBeginLoc(), Context->getSourceManager(), IncludeOffset); } else { diff --git a/clang/unittests/Index/IndexTests.cpp b/clang/unittests/Index/IndexTests.cpp index 90d47657fc5cc..8b024aec383e1 100644 --- a/clang/unittests/Index/IndexTests.cpp +++ b/clang/unittests/Index/IndexTests.cpp @@ -9,7 +9,10 @@ #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclCXX.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/ASTMatchers/ASTMatchFinder.h" +#include "clang/ASTMatchers/ASTMatchers.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/SourceManager.h" #include "clang/Frontend/CompilerInstance.h" @@ -455,7 +458,7 @@ TEST(IndexTest, ReadWriteRoles) { // Two `Holder<decltype([]{})>` instantiations have distinct closure types, // therefore they should have distinct USRs. -TEST(USRTest, NestedLambdaTagsInTemplateArgGetDistinctUSRs) { +TEST(USRTest, NestedLambdaTagsInTemplateArg) { auto AST = tooling::buildASTFromCodeWithArgs( R"cpp( template <class T> struct Holder { void reset(T*) {} }; @@ -467,24 +470,71 @@ TEST(USRTest, NestedLambdaTagsInTemplateArgGetDistinctUSRs) { {"-std=c++20"}); ASSERT_TRUE(AST); - struct Visitor : RecursiveASTVisitor<Visitor> { - std::vector<const ClassTemplateSpecializationDecl *> Specs; - bool VisitCXXMemberCallExpr(CXXMemberCallExpr *Call) { - if (auto *M = Call->getMethodDecl()) - if (auto *S = dyn_cast<ClassTemplateSpecializationDecl>(M->getParent())) - Specs.push_back(S); - return true; - } - } V; - V.TraverseAST(AST->getASTContext()); - ASSERT_EQ(V.Specs.size(), 2u); + using namespace ast_matchers; + auto Matches = match( + cxxMemberCallExpr(callee(cxxMethodDecl(ofClass( + classTemplateSpecializationDecl(hasName("Holder")).bind("spec"))))), + AST->getASTContext()); + ASSERT_EQ(Matches.size(), 2u); llvm::SmallString<128> U0, U1; - ASSERT_FALSE(generateUSRForDecl(V.Specs[0], U0)); - ASSERT_FALSE(generateUSRForDecl(V.Specs[1], U1)); + ASSERT_FALSE(generateUSRForDecl( + Matches[0].getNodeAs<ClassTemplateSpecializationDecl>("spec"), U0)); + ASSERT_FALSE(generateUSRForDecl( + Matches[1].getNodeAs<ClassTemplateSpecializationDecl>("spec"), U1)); EXPECT_NE(U0, U1) << "U0=" << U0.str() << " U1=" << U1.str(); } +// A variant of `NestedLambdaTagsInTemplateArg` that tests when the template +// specialization is a macro expansion. +TEST(USRTest, NestedLambdaTagsInTemplateArgWithTheSameSpellingLoc) { + auto AST = tooling::buildASTFromCodeWithArgs( + R"cpp( + #define MY_SPEC Holder<decltype([]{})>().reset(nullptr) + template <class T> struct Holder { void reset(T*) {} }; + void caller() { + MY_SPEC; + MY_SPEC; + } + )cpp", + {"-std=c++20"}); + ASSERT_TRUE(AST); + + using namespace ast_matchers; + auto Matches = + match(cxxMemberCallExpr(callee(cxxMethodDecl(ofClass( + classTemplateSpecializationDecl( + hasName("Holder"), + hasTemplateArgument( + 0, refersToType(hasDeclaration( + cxxRecordDecl(isLambda()).bind("closure"))))) + .bind("spec"))))), + AST->getASTContext()); + ASSERT_EQ(Matches.size(), 2u); + + const auto *CTSD1 = + Matches[0].getNodeAs<ClassTemplateSpecializationDecl>("spec"); + const auto *CTSD2 = + Matches[1].getNodeAs<ClassTemplateSpecializationDecl>("spec"); + const auto *CRD1 = Matches[0].getNodeAs<CXXRecordDecl>("closure"); + const auto *CRD2 = Matches[1].getNodeAs<CXXRecordDecl>("closure"); + + // First, check that the two specializations have distinct USR names: + llvm::SmallString<128> U0, U1; + ASSERT_FALSE(generateUSRForDecl(CTSD1, U0)); + ASSERT_FALSE(generateUSRForDecl(CTSD2, U1)); + ASSERT_NE(U0, U1) << "U0=" << U0.str() << " U1=" << U1.str(); + + // Second, check that the two lambda types have identical spellingLoc offsets + // but distinct expansionLoc offsets: + SourceManager &SM = AST->getASTContext().getSourceManager(); + + ASSERT_EQ(SM.getDecomposedLoc(SM.getSpellingLoc(CRD1->getBeginLoc())), + SM.getDecomposedLoc(SM.getSpellingLoc(CRD2->getBeginLoc()))); + ASSERT_NE(SM.getDecomposedLoc(SM.getExpansionLoc(CRD1->getBeginLoc())), + SM.getDecomposedLoc(SM.getExpansionLoc(CRD2->getBeginLoc()))); +} + } // namespace } // namespace index } // namespace clang >From ac2060640c0718bd912b06ce00cb84ad66630505 Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Wed, 24 Jun 2026 16:25:28 -0700 Subject: [PATCH 3/6] address comments --- clang/lib/UnifiedSymbolResolution/USRGeneration.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp index a3885640427a5..6508facc3fcd5 100644 --- a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp +++ b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp @@ -59,7 +59,7 @@ static bool printLoc(llvm::raw_ostream &OS, SourceLocation Loc, // Use the offest into the FileID to represent the location. Using // a line/column can cause us to look back at the original source file, // which is expensive. - OS << '@' << Decomposed.second; + printLocOffset(OS, Loc, SM); } return false; } >From 3105315ddb61a00f67545d101deb5d8c809e2e6b Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Wed, 24 Jun 2026 16:40:36 -0700 Subject: [PATCH 4/6] address comments --- .../UnifiedSymbolResolution/USRGeneration.cpp | 16 +++++++++++----- clang/unittests/Index/IndexTests.cpp | 17 +++++++++-------- 2 files changed, 20 insertions(+), 13 deletions(-) diff --git a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp index 6508facc3fcd5..5f689ad32159b 100644 --- a/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp +++ b/clang/lib/UnifiedSymbolResolution/USRGeneration.cpp @@ -30,11 +30,17 @@ static bool printLocOffset(llvm::raw_ostream &OS, SourceLocation Loc, const SourceManager &SM) { if (Loc.isInvalid()) return true; - Loc = SM.getExpansionLoc(Loc); - // Use the offest into the FileID to represent the location. Using - // a line/column can cause us to look back at the original source file, - // which is expensive. - OS << '@' << SM.getDecomposedLoc(Loc).second; + if (SM.getExpansionLoc(Loc) == SM.getSpellingLoc(Loc)) { + // Use the offest into the FileID to represent the location. Using + // a line/column can cause us to look back at the original source file, + // which is expensive. + OS << '@' << SM.getDecomposedLoc(Loc).second; + return false; + } + // In case expansion and spelling locations differ, we need both of them to + // make the USR distinguishable: + OS << '@' << SM.getDecomposedLoc(SM.getExpansionLoc(Loc)).second; + OS << '@' << SM.getDecomposedLoc(SM.getSpellingLoc(Loc)).second; return false; } diff --git a/clang/unittests/Index/IndexTests.cpp b/clang/unittests/Index/IndexTests.cpp index 8b024aec383e1..4e5e0c33d362f 100644 --- a/clang/unittests/Index/IndexTests.cpp +++ b/clang/unittests/Index/IndexTests.cpp @@ -486,15 +486,16 @@ TEST(USRTest, NestedLambdaTagsInTemplateArg) { } // A variant of `NestedLambdaTagsInTemplateArg` that tests when the template -// specialization is a macro expansion. -TEST(USRTest, NestedLambdaTagsInTemplateArgWithTheSameSpellingLoc) { +// specializations have identical expansion location but distinct spelling +// location. +TEST(USRTest, NestedLambdaTagsInTemplateArgWithSameSpellingDistinctExpansion) { auto AST = tooling::buildASTFromCodeWithArgs( R"cpp( - #define MY_SPEC Holder<decltype([]{})>().reset(nullptr) + #define MY_SPEC Holder<decltype([]{})>().reset(nullptr); \ + Holder<decltype([]{})>().reset(nullptr) template <class T> struct Holder { void reset(T*) {} }; void caller() { MY_SPEC; - MY_SPEC; } )cpp", {"-std=c++20"}); @@ -525,13 +526,13 @@ TEST(USRTest, NestedLambdaTagsInTemplateArgWithTheSameSpellingLoc) { ASSERT_FALSE(generateUSRForDecl(CTSD2, U1)); ASSERT_NE(U0, U1) << "U0=" << U0.str() << " U1=" << U1.str(); - // Second, check that the two lambda types have identical spellingLoc offsets - // but distinct expansionLoc offsets: + // Second, check that the two lambda types have distinct spellingLoc offsets + // but identical expansionLoc offsets: SourceManager &SM = AST->getASTContext().getSourceManager(); - ASSERT_EQ(SM.getDecomposedLoc(SM.getSpellingLoc(CRD1->getBeginLoc())), + ASSERT_NE(SM.getDecomposedLoc(SM.getSpellingLoc(CRD1->getBeginLoc())), SM.getDecomposedLoc(SM.getSpellingLoc(CRD2->getBeginLoc()))); - ASSERT_NE(SM.getDecomposedLoc(SM.getExpansionLoc(CRD1->getBeginLoc())), + ASSERT_EQ(SM.getDecomposedLoc(SM.getExpansionLoc(CRD1->getBeginLoc())), SM.getDecomposedLoc(SM.getExpansionLoc(CRD2->getBeginLoc()))); } >From 4c3411a8cb0da53a4445ce5e34c531276e55d78d Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Thu, 25 Jun 2026 12:18:30 -0700 Subject: [PATCH 5/6] Merge main and make change to a dependent test --- .../Scalable/PointerFlow/entity-name-no-conflict.cpp | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp b/clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp index e190ee264cb7d..c045e732ea6c1 100644 --- a/clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp +++ b/clang/test/Analysis/Scalable/PointerFlow/entity-name-no-conflict.cpp @@ -4,7 +4,7 @@ // RUN: --ssaf-extract-summaries=PointerFlow \ // RUN: --ssaf-tu-summary-file=%t/tu.summary.json \ // RUN: --ssaf-compilation-unit-id="tu-1" \ -// RUN: -mllvm -debug-only=ssaf-analyses 2>&1 | FileCheck %s +// RUN: -mllvm -debug-only=ssaf-analyses 2>&1 | FileCheck %s --allow-empty // The two `Holder<decltype([]{})>` instantiations are distinct types @@ -12,8 +12,7 @@ // currently fails to distinguish them. -// CHECK: dropping duplicate PointerFlow summary -// FIXME: change to CHECK-NOT once the bug gets fixed +// CHECK-NOT: dropping duplicate PointerFlow summary template <class T> struct Holder { >From 62ba9ba9981c5f885a1e48b1f9c30373cd3983bb Mon Sep 17 00:00:00 2001 From: Ziqing Luo <[email protected]> Date: Thu, 25 Jun 2026 13:08:45 -0700 Subject: [PATCH 6/6] Update clang/unittests/Index/IndexTests.cpp MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Balázs Benics <[email protected]> --- clang/unittests/Index/IndexTests.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/unittests/Index/IndexTests.cpp b/clang/unittests/Index/IndexTests.cpp index 4e5e0c33d362f..e20764e4fb9d9 100644 --- a/clang/unittests/Index/IndexTests.cpp +++ b/clang/unittests/Index/IndexTests.cpp @@ -528,7 +528,7 @@ TEST(USRTest, NestedLambdaTagsInTemplateArgWithSameSpellingDistinctExpansion) { // Second, check that the two lambda types have distinct spellingLoc offsets // but identical expansionLoc offsets: - SourceManager &SM = AST->getASTContext().getSourceManager(); + const SourceManager &SM = AST->getASTContext().getSourceManager(); ASSERT_NE(SM.getDecomposedLoc(SM.getSpellingLoc(CRD1->getBeginLoc())), SM.getDecomposedLoc(SM.getSpellingLoc(CRD2->getBeginLoc()))); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
