[clang-tools-extra] [clang-tidy][NFC] Add `findTokenInRange` and reuse it (PR #183941)

Daniil Dudkin via cfe-commits Sun, 01 Mar 2026 05:29:04 -0800

https://github.com/unterumarmung updated 
https://github.com/llvm/llvm-project/pull/183941


>From a77530cbce6f6ec15b491fe33b89259596625415 Mon Sep 17 00:00:00 2001
From: Daniil Dudkin <[email protected]>
Date: Sat, 28 Feb 2026 22:08:22 +0300
Subject: [PATCH] [clang-tidy][NFC] Add findTokenInRange and reuse it in
 ExplicitConstructorCheck

---
 .../google/ExplicitConstructorCheck.cpp       |  43 +---
 .../clang-tidy/utils/LexerUtils.cpp           |  54 +++++
 .../clang-tidy/utils/LexerUtils.h             |   9 +
 .../unittests/clang-tidy/LexerUtilsTest.cpp   | 204 ++++++++++++++++++
 4 files changed, 276 insertions(+), 34 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp 
b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
index ac604b7b9f1b4..2c64c97a2e95d 100644
--- a/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
+++ b/clang-tools-extra/clang-tidy/google/ExplicitConstructorCheck.cpp
@@ -7,10 +7,10 @@
 
//===----------------------------------------------------------------------===//
 
 #include "ExplicitConstructorCheck.h"
+#include "../utils/LexerUtils.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/ASTMatchers/ASTMatchFinder.h"
 #include "clang/ASTMatchers/ASTMatchers.h"
-#include "clang/Lex/Lexer.h"
 
 using namespace clang::ast_matchers;
 
@@ -31,32 +31,6 @@ void ExplicitConstructorCheck::registerMatchers(MatchFinder 
*Finder) {
       this);
 }
 
-// Looks for the token matching the predicate and returns the range of the 
found
-// token including trailing whitespace.
-static SourceRange findToken(const SourceManager &Sources,
-                             const LangOptions &LangOpts,
-                             SourceLocation StartLoc, SourceLocation EndLoc,
-                             bool (*Pred)(const Token &)) {
-  if (StartLoc.isMacroID() || EndLoc.isMacroID())
-    return {};
-  const FileID File = Sources.getFileID(Sources.getSpellingLoc(StartLoc));
-  const StringRef Buf = Sources.getBufferData(File);
-  const char *StartChar = Sources.getCharacterData(StartLoc);
-  Lexer Lex(StartLoc, LangOpts, StartChar, StartChar, Buf.end());
-  Lex.SetCommentRetentionState(true);
-  Token Tok;
-  do {
-    Lex.LexFromRawLexer(Tok);
-    if (Pred(Tok)) {
-      Token NextTok;
-      Lex.LexFromRawLexer(NextTok);
-      return {Tok.getLocation(), NextTok.getLocation()};
-    }
-  } while (Tok.isNot(tok::eof) && Tok.getLocation() < EndLoc);
-
-  return {};
-}
-
 static bool declIsStdInitializerList(const NamedDecl *D) {
   // First use the fast getName() method to avoid unnecessary calls to the
   // slow getQualifiedNameAsString().
@@ -113,9 +87,12 @@ void ExplicitConstructorCheck::check(const 
MatchFinder::MatchResult &Result) {
       return Tok.is(tok::raw_identifier) &&
              Tok.getRawIdentifier() == "explicit";
     };
-    const SourceRange ExplicitTokenRange =
-        findToken(*Result.SourceManager, getLangOpts(),
-                  Ctor->getOuterLocStart(), Ctor->getEndLoc(), IsKwExplicit);
+    const CharSourceRange ConstructorRange = CharSourceRange::getTokenRange(
+        Ctor->getOuterLocStart(), Ctor->getEndLoc());
+    const CharSourceRange ExplicitTokenRange =
+        utils::lexer::findTokenTextInRange(ConstructorRange,
+                                           *Result.SourceManager, 
getLangOpts(),
+                                           IsKwExplicit);
     StringRef ConstructorDescription;
     if (Ctor->isMoveConstructor())
       ConstructorDescription = "move";
@@ -127,10 +104,8 @@ void ExplicitConstructorCheck::check(const 
MatchFinder::MatchResult &Result) {
     auto Diag = diag(Ctor->getLocation(),
                      "%0 constructor should not be declared explicit")
                 << ConstructorDescription;
-    if (ExplicitTokenRange.isValid()) {
-      Diag << FixItHint::CreateRemoval(
-          CharSourceRange::getCharRange(ExplicitTokenRange));
-    }
+    if (ExplicitTokenRange.isValid())
+      Diag << FixItHint::CreateRemoval(ExplicitTokenRange);
     return;
   }
 
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp 
b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
index a9a8c7bbf4c89..6d9e4d2ce6f83 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp
@@ -160,6 +160,60 @@ getTrailingCommentsInRange(CharSourceRange Range, const 
SourceManager &SM,
   return Comments;
 }
 
+CharSourceRange
+findTokenTextInRange(CharSourceRange Range, const SourceManager &SM,
+                     const LangOptions &LangOpts,
+                     llvm::function_ref<bool(const Token &)> Pred) {
+  if (Range.isInvalid())
+    return {};
+
+  // Normalize to a file-based char range so raw lexing can operate on one
+  // contiguous buffer and reject unmappable (e.g. macro) ranges.
+  const CharSourceRange FileRange =
+      Lexer::makeFileCharRange(Range, SM, LangOpts);
+  if (FileRange.isInvalid())
+    return {};
+
+  const auto [BeginFID, BeginOffset] =
+      SM.getDecomposedLoc(FileRange.getBegin());
+  const auto [EndFID, EndOffset] = SM.getDecomposedLoc(FileRange.getEnd());
+  if (BeginFID != EndFID || BeginOffset > EndOffset)
+    return {};
+
+  bool Invalid = false;
+  const StringRef Buffer = SM.getBufferData(BeginFID, &Invalid);
+  if (Invalid)
+    return {};
+
+  const char *LexStart = Buffer.data() + BeginOffset;
+  // Re-lex raw tokens in the bounded file buffer while preserving comments so
+  // callers can match tokens regardless of interleaved comments.
+  Lexer TheLexer(SM.getLocForStartOfFile(BeginFID), LangOpts, Buffer.begin(),
+                 LexStart, Buffer.end());
+  TheLexer.SetCommentRetentionState(true);
+
+  while (true) {
+    Token Tok;
+    if (TheLexer.LexFromRawLexer(Tok))
+      return {};
+
+    if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() ||
+        SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation()))
+      return {};
+
+    if (!Pred(Tok))
+      continue;
+
+    Token NextTok;
+    if (TheLexer.LexFromRawLexer(NextTok))
+      return {};
+    // Return a char range ending at the next token start so trailing trivia of
+    // the matched token is included (useful for fix-it removals).
+    return CharSourceRange::getCharRange(Tok.getLocation(),
+                                         NextTok.getLocation());
+  }
+}
+
 std::optional<Token> getQualifyingToken(tok::TokenKind TK,
                                         CharSourceRange Range,
                                         const ASTContext &Context,
diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h 
b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
index 38123ae14cff7..7accfc7748648 100644
--- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h
+++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h
@@ -12,6 +12,7 @@
 #include "clang/AST/ASTContext.h"
 #include "clang/Basic/TokenKinds.h"
 #include "clang/Lex/Lexer.h"
+#include "llvm/ADT/STLFunctionalExtras.h"
 #include <optional>
 #include <utility>
 #include <vector>
@@ -126,6 +127,14 @@ std::vector<CommentToken>
 getTrailingCommentsInRange(CharSourceRange Range, const SourceManager &SM,
                            const LangOptions &LangOpts);
 
+/// Returns source range of the first token in \p Range matching \p Pred.
+/// The returned char range starts at the matched token and ends at the start
+/// of the next token. Returns invalid range if no token matches.
+CharSourceRange
+findTokenTextInRange(CharSourceRange Range, const SourceManager &SM,
+                     const LangOptions &LangOpts,
+                     llvm::function_ref<bool(const Token &)> Pred);
+
 /// Assuming that ``Range`` spans a CVR-qualified type, returns the
 /// token in ``Range`` that is responsible for the qualification. ``Range``
 /// must be valid with respect to ``SM``.  Returns ``std::nullopt`` if no
diff --git a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp 
b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
index 438a78b4694ee..d721fcddf4c79 100644
--- a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp
@@ -8,6 +8,7 @@
 
 #include "../clang-tidy/utils/LexerUtils.h"
 
+#include "clang/AST/DeclCXX.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Frontend/ASTUnit.h"
@@ -41,8 +42,211 @@ static CharSourceRange rangeFromAnnotations(const 
llvm::Annotations &A,
   return CharSourceRange::getCharRange(Begin, End);
 }
 
+static bool isRawIdentifierNamed(const Token &Tok, StringRef Name) {
+  return Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == Name;
+}
+
 namespace {
 
+TEST(LexerUtilsTest, FindTokenTextInRangeFindsMatch) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[explicit   ]] S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  ASSERT_TRUE(MatchedRange.isValid());
+
+  const StringRef CodeText = Code.code();
+  const size_t ExplicitOffset = CodeText.find("explicit");
+  ASSERT_NE(StringRef::npos, ExplicitOffset);
+  const size_t ConstructorOffset = CodeText.find("S(int)");
+  ASSERT_NE(StringRef::npos, ConstructorOffset);
+  EXPECT_EQ(ExplicitOffset, SM.getFileOffset(MatchedRange.getBegin()));
+  EXPECT_EQ(ConstructorOffset, SM.getFileOffset(MatchedRange.getEnd()));
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidWhenNotFound) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[int x = 0;]]
+  S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeDoesNotMatchTokenAtEndBoundary) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[int x = 0; ]]explicit S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest,
+     FindTokenTextInRangeReturnsInvalidWhenPredicateNeverMatches) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $range[[explicit ]] S(int);
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange SearchRange =
+      rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range");
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts, [](const Token &) { return false; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForInvalidRange) {
+  std::unique_ptr<ASTUnit> AST = buildAST("struct S { explicit S(int); };");
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      CharSourceRange(), SM, LangOpts, [](const Token &) { return true; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForReversedOffsets) {
+  llvm::Annotations Code(R"cpp(
+struct S {
+  $a^explicit S(int);$b^
+};
+)cpp");
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code());
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const SourceLocation MainFileStart =
+      SM.getLocForStartOfFile(SM.getMainFileID());
+  const SourceLocation Begin = MainFileStart.getLocWithOffset(Code.point("b"));
+  const SourceLocation End = MainFileStart.getLocWithOffset(Code.point("a"));
+  ASSERT_TRUE(SM.isBeforeInTranslationUnit(End, Begin));
+
+  const CharSourceRange ReversedRange =
+      CharSourceRange::getCharRange(Begin, End);
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      ReversedRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidWhenFileRangeIsInvalid) 
{
+  llvm::Annotations Code(R"cpp(
+#include "header.h"
+int $begin^main_var = 0;
+)cpp");
+  const FileContentMappings Mappings = {
+      {"header.h", "int header_var = 0;\n"},
+  };
+  std::unique_ptr<ASTUnit> AST = buildAST(Code.code(), Mappings);
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const SourceLocation MainFileStart =
+      SM.getLocForStartOfFile(SM.getMainFileID());
+  const SourceLocation Begin =
+      MainFileStart.getLocWithOffset(Code.point("begin"));
+  ASSERT_TRUE(Begin.isFileID());
+
+  auto HeaderFile = AST->getFileManager().getOptionalFileRef("header.h");
+  ASSERT_TRUE(HeaderFile.has_value());
+  const FileID HeaderFID = SM.translateFile(*HeaderFile);
+  ASSERT_TRUE(HeaderFID.isValid());
+  const SourceLocation HeaderBegin = SM.getLocForStartOfFile(HeaderFID);
+  ASSERT_TRUE(HeaderBegin.isFileID());
+
+  const CharSourceRange SearchRange =
+      CharSourceRange::getCharRange(Begin, HeaderBegin);
+  const CharSourceRange FileRange =
+      Lexer::makeFileCharRange(SearchRange, SM, LangOpts);
+  EXPECT_TRUE(FileRange.isInvalid());
+
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts, [](const Token &) { return true; });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
+TEST(LexerUtilsTest, FindTokenTextInRangeReturnsInvalidForMacroRange) {
+  std::unique_ptr<ASTUnit> AST = buildAST(R"cpp(
+#define EXPLICIT explicit
+struct S {
+  EXPLICIT S(int);
+};
+)cpp");
+  ASSERT_TRUE(AST);
+  const ASTContext &Context = AST->getASTContext();
+  const SourceManager &SM = Context.getSourceManager();
+  const LangOptions &LangOpts = Context.getLangOpts();
+
+  const CXXConstructorDecl *Ctor = [&Context] {
+    for (const Decl *D : Context.getTranslationUnitDecl()->decls()) {
+      const auto *RD = dyn_cast<CXXRecordDecl>(D);
+      if (!RD)
+        continue;
+      for (const CXXConstructorDecl *Ctor : RD->ctors())
+        if (!Ctor->isImplicit())
+          return Ctor;
+    }
+    return static_cast<const CXXConstructorDecl *>(nullptr);
+  }();
+  ASSERT_NE(nullptr, Ctor);
+  ASSERT_TRUE(Ctor->getOuterLocStart().isMacroID());
+
+  const CharSourceRange SearchRange = CharSourceRange::getTokenRange(
+      Ctor->getOuterLocStart(), Ctor->getEndLoc());
+  const CharSourceRange MatchedRange = utils::lexer::findTokenTextInRange(
+      SearchRange, SM, LangOpts,
+      [](const Token &Tok) { return isRawIdentifierNamed(Tok, "explicit"); });
+  EXPECT_TRUE(MatchedRange.isInvalid());
+}
+
 TEST(LexerUtilsTest, GetTrailingCommentsInRangeAdjacentComments) {
   llvm::Annotations Code(R"cpp(
 void f() {

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang-tools-extra] [clang-tidy][NFC] Add `findTokenInRange` and reuse it (PR #183941)

Reply via email to