https://github.com/unterumarmung updated https://github.com/llvm/llvm-project/pull/180366
>From 0d5296d6c7caf24229803e03485864897aad9e87 Mon Sep 17 00:00:00 2001 From: Daniil Dudkin <[email protected]> Date: Sat, 7 Feb 2026 22:46:24 +0300 Subject: [PATCH] [clang-tidy] Preserve typedef comments in `modernize-use-using` Keep comment blocks between typedef type and name by analyzing the raw lexer range, and avoid injecting unrelated tokens into the replacement. Reuse comment-trivia handling from `bugprone-argument-comment` and add unit tests for selected `LexerUtils` functions while extending `modernize-use-using` regression coverage. Fixes https://github.com/llvm/llvm-project/issues/159518. --- .../bugprone/ArgumentCommentCheck.cpp | 70 +--- .../clang-tidy/modernize/UseUsingCheck.cpp | 239 +++++++++++--- .../clang-tidy/utils/LexerUtils.cpp | 147 ++++++++- .../clang-tidy/utils/LexerUtils.h | 28 ++ clang-tools-extra/docs/ReleaseNotes.rst | 3 +- .../checkers/modernize/use-using.cpp | 69 +++- .../unittests/clang-tidy/CMakeLists.txt | 1 + .../unittests/clang-tidy/LexerUtilsTest.cpp | 311 ++++++++++++++++++ 8 files changed, 758 insertions(+), 110 deletions(-) create mode 100644 clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp index d46896808bd09..a94daf4d15357 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp @@ -77,55 +77,9 @@ void ArgumentCommentCheck::registerMatchers(MatchFinder *Finder) { this); } -static std::vector<std::pair<SourceLocation, StringRef>> -getCommentsInRange(ASTContext *Ctx, CharSourceRange Range) { - std::vector<std::pair<SourceLocation, StringRef>> Comments; - auto &SM = Ctx->getSourceManager(); - const std::pair<FileID, unsigned> BeginLoc = - SM.getDecomposedLoc(Range.getBegin()), - EndLoc = - SM.getDecomposedLoc(Range.getEnd()); - - if (BeginLoc.first != EndLoc.first) - return Comments; - - bool Invalid = false; - const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid); - if (Invalid) - return Comments; - - const char *StrData = Buffer.data() + BeginLoc.second; - - Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), Ctx->getLangOpts(), - Buffer.begin(), StrData, Buffer.end()); - TheLexer.SetCommentRetentionState(true); - - while (true) { - Token Tok; - if (TheLexer.LexFromRawLexer(Tok)) - break; - if (Tok.getLocation() == Range.getEnd() || Tok.is(tok::eof)) - break; - - if (Tok.is(tok::comment)) { - const std::pair<FileID, unsigned> CommentLoc = - SM.getDecomposedLoc(Tok.getLocation()); - assert(CommentLoc.first == BeginLoc.first); - Comments.emplace_back( - Tok.getLocation(), - StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength())); - } else { - // Clear comments found before the different token, e.g. comma. - Comments.clear(); - } - } - - return Comments; -} - -static std::vector<std::pair<SourceLocation, StringRef>> +static std::vector<utils::lexer::CommentToken> getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) { - std::vector<std::pair<SourceLocation, StringRef>> Comments; + std::vector<utils::lexer::CommentToken> Comments; while (Loc.isValid()) { const std::optional<Token> Tok = utils::lexer::getPreviousToken( Loc, Ctx->getSourceManager(), Ctx->getLangOpts(), @@ -133,11 +87,12 @@ getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) { if (!Tok || Tok->isNot(tok::comment)) break; Loc = Tok->getLocation(); - Comments.emplace_back( + Comments.emplace_back(utils::lexer::CommentToken{ Loc, Lexer::getSourceText(CharSourceRange::getCharRange( Loc, Loc.getLocWithOffset(Tok->getLength())), - Ctx->getSourceManager(), Ctx->getLangOpts())); + Ctx->getSourceManager(), Ctx->getLangOpts()), + }); } return Comments; } @@ -304,9 +259,10 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx, MakeFileCharRange(ArgBeginLoc, Args[I]->getBeginLoc()); ArgBeginLoc = Args[I]->getEndLoc(); - std::vector<std::pair<SourceLocation, StringRef>> Comments; + std::vector<utils::lexer::CommentToken> Comments; if (BeforeArgument.isValid()) { - Comments = getCommentsInRange(Ctx, BeforeArgument); + Comments = utils::lexer::getCommentsInRange( + BeforeArgument, Ctx->getSourceManager(), Ctx->getLangOpts()); } else { // Fall back to parsing back from the start of the argument. const CharSourceRange ArgsRange = @@ -314,18 +270,18 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx, Comments = getCommentsBeforeLoc(Ctx, ArgsRange.getBegin()); } - for (auto Comment : Comments) { + for (const auto &Comment : Comments) { llvm::SmallVector<StringRef, 2> Matches; - if (IdentRE.match(Comment.second, &Matches) && + if (IdentRE.match(Comment.Text, &Matches) && !sameName(Matches[2], II->getName(), StrictMode)) { { const DiagnosticBuilder Diag = - diag(Comment.first, "argument name '%0' in comment does not " - "match parameter name %1") + diag(Comment.Loc, "argument name '%0' in comment does not " + "match parameter name %1") << Matches[2] << II; if (isLikelyTypo(Callee->parameters(), Matches[2], II->getName())) { Diag << FixItHint::CreateReplacement( - Comment.first, (Matches[1] + II->getName() + Matches[3]).str()); + Comment.Loc, (Matches[1] + II->getName() + Matches[3]).str()); } } diag(PVD->getLocation(), "%0 declared here", DiagnosticIDs::Note) << II; diff --git a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp index 4d9b5ee568d80..cc467407649b2 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp @@ -26,6 +26,129 @@ AST_MATCHER(clang::LinkageSpecDecl, isExternCLinkage) { namespace clang::tidy::modernize { +namespace lexer = clang::tidy::utils::lexer; + +namespace { +struct RangeTextInfo { + std::string Text; + lexer::TokenRangeInfo Tokens; +}; +} // namespace + +static bool hasNonWhitespace(llvm::StringRef Text) { + return Text.find_first_not_of(" \t\n\r\f\v") != llvm::StringRef::npos; +} + +static RangeTextInfo getRangeTextInfo(clang::SourceLocation Begin, + clang::SourceLocation End, + const clang::SourceManager &SM, + const clang::LangOptions &LangOpts) { + RangeTextInfo Info; + if (!Begin.isValid() || !End.isValid() || Begin.isMacroID() || + End.isMacroID()) + return Info; + + const clang::CharSourceRange Range = + clang::CharSourceRange::getCharRange(Begin, End); + Info.Text = lexer::getSourceText(Range, SM, LangOpts); + Info.Tokens = lexer::analyzeTokenRange(Range, SM, LangOpts); + return Info; +} + +static std::string getFunctionPointerTypeText(clang::SourceRange TypeRange, + clang::SourceLocation NameLoc, + const clang::SourceManager &SM, + const clang::LangOptions &LO) { + clang::SourceLocation StartLoc = NameLoc; + clang::SourceLocation EndLoc = NameLoc; + + while (true) { + const std::optional<clang::Token> Prev = + lexer::getPreviousToken(StartLoc, SM, LO); + const std::optional<clang::Token> Next = + lexer::findNextTokenSkippingComments(EndLoc, SM, LO); + if (!Prev || Prev->isNot(clang::tok::l_paren) || !Next || + Next->isNot(clang::tok::r_paren)) + break; + + StartLoc = Prev->getLocation(); + EndLoc = Next->getLocation(); + } + + const clang::CharSourceRange RangeLeftOfIdentifier = + clang::CharSourceRange::getCharRange(TypeRange.getBegin(), StartLoc); + const clang::CharSourceRange RangeRightOfIdentifier = + clang::CharSourceRange::getCharRange( + clang::Lexer::getLocForEndOfToken(EndLoc, 0, SM, LO), + clang::Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO)); + return lexer::getSourceText(RangeLeftOfIdentifier, SM, LO) + + lexer::getSourceText(RangeRightOfIdentifier, SM, LO); +} + +static RangeTextInfo getLeadingTextInfo(bool IsFirstTypedefInGroup, + clang::SourceRange ReplaceRange, + clang::SourceRange TypeRange, + const clang::SourceManager &SM, + const clang::LangOptions &LO) { + RangeTextInfo Info; + if (!IsFirstTypedefInGroup) + return Info; + + const clang::SourceLocation TypedefEnd = + clang::Lexer::getLocForEndOfToken(ReplaceRange.getBegin(), 0, SM, LO); + Info = getRangeTextInfo(TypedefEnd, TypeRange.getBegin(), SM, LO); + // Keep leading trivia only when it actually contains comments. This avoids + // shifting plain whitespace from between 'typedef' and the type into the + // replacement, preserving formatting for un-commented declarations. + if (!Info.Tokens.HasComment) + Info.Text.clear(); + return Info; +} + +static RangeTextInfo getSuffixTextInfo(bool FunctionPointerCase, + bool IsFirstTypedefInGroup, + clang::SourceLocation PrevReplacementEnd, + clang::SourceRange TypeRange, + clang::SourceLocation NameLoc, + const clang::SourceManager &SM, + const clang::LangOptions &LO) { + RangeTextInfo Info; + if (FunctionPointerCase) + return Info; + + // Capture the raw text between type and name to preserve trailing comments, + // including multi-line // blocks, without re-lexing individual comment + // tokens. + if (IsFirstTypedefInGroup) { + const clang::SourceLocation AfterType = + clang::Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO); + return getRangeTextInfo(AfterType, NameLoc, SM, LO); + } + + if (!PrevReplacementEnd.isValid() || PrevReplacementEnd.isMacroID()) + return Info; + + clang::SourceLocation AfterComma = PrevReplacementEnd; + if (const std::optional<clang::Token> NextTok = + lexer::findNextTokenSkippingComments(AfterComma, SM, LO)) { + if (NextTok->is(clang::tok::comma)) { + AfterComma = + clang::Lexer::getLocForEndOfToken(NextTok->getLocation(), 0, SM, LO); + } + } + return getRangeTextInfo(AfterComma, NameLoc, SM, LO); +} + +static void stripLeadingComma(RangeTextInfo &Info) { + if (Info.Text.empty()) + return; + // Overlapping ranges in multi-declarator typedefs can leave a leading comma + // in the captured suffix. Drop it so the replacement doesn't reintroduce it. + const size_t NonWs = Info.Text.find_first_not_of(" \t\n\r\f\v"); + if (NonWs != std::string::npos && Info.Text[NonWs] == ',') + Info.Text.erase(0, NonWs + 1); +} + static constexpr StringRef ExternCDeclName = "extern-c-decl"; static constexpr StringRef ParentDeclName = "parent-decl"; static constexpr StringRef TagDeclName = "tag-decl"; @@ -130,69 +253,59 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { const TypeLoc TL = MatchedDecl->getTypeSourceInfo()->getTypeLoc(); - bool FunctionPointerCase = false; - auto [Type, QualifierStr] = [MatchedDecl, this, &TL, &FunctionPointerCase, - &SM, - &LO]() -> std::pair<std::string, std::string> { - SourceRange TypeRange = TL.getSourceRange(); + struct TypeInfo { + SourceRange Range; + bool FunctionPointerCase = false; + std::string Type; + std::string Qualifier; + }; + + const TypeInfo TI = [&] { + TypeInfo Info; + Info.Range = TL.getSourceRange(); // Function pointer case, get the left and right side of the identifier // without the identifier. - if (TypeRange.fullyContains(MatchedDecl->getLocation())) { - FunctionPointerCase = true; - SourceLocation StartLoc = MatchedDecl->getLocation(); - SourceLocation EndLoc = MatchedDecl->getLocation(); - - while (true) { - const std::optional<Token> Prev = - utils::lexer::getPreviousToken(StartLoc, SM, LO); - const std::optional<Token> Next = - utils::lexer::findNextTokenSkippingComments(EndLoc, SM, LO); - if (!Prev || Prev->isNot(tok::l_paren) || !Next || - Next->isNot(tok::r_paren)) - break; - - StartLoc = Prev->getLocation(); - EndLoc = Next->getLocation(); - } - - const auto RangeLeftOfIdentifier = - CharSourceRange::getCharRange(TypeRange.getBegin(), StartLoc); - const auto RangeRightOfIdentifier = CharSourceRange::getCharRange( - Lexer::getLocForEndOfToken(EndLoc, 0, SM, LO), - Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO)); - const std::string VerbatimType = - (Lexer::getSourceText(RangeLeftOfIdentifier, SM, LO) + - Lexer::getSourceText(RangeRightOfIdentifier, SM, LO)) - .str(); - return {VerbatimType, ""}; + if (Info.Range.fullyContains(MatchedDecl->getLocation())) { + Info.FunctionPointerCase = true; + Info.Type = getFunctionPointerTypeText( + Info.Range, MatchedDecl->getLocation(), SM, LO); + return Info; } - StringRef ExtraReference = ""; - if (MainTypeEndLoc.isValid() && TypeRange.fullyContains(MainTypeEndLoc)) { + std::string ExtraReference; + if (MainTypeEndLoc.isValid() && Info.Range.fullyContains(MainTypeEndLoc)) { // Each type introduced in a typedef can specify being a reference or // pointer type separately, so we need to figure out if the new using-decl // needs to be to a reference or pointer as well. - const SourceLocation Tok = utils::lexer::findPreviousAnyTokenKind( + const SourceLocation Tok = lexer::findPreviousAnyTokenKind( MatchedDecl->getLocation(), SM, LO, tok::TokenKind::star, tok::TokenKind::amp, tok::TokenKind::comma, tok::TokenKind::kw_typedef); - ExtraReference = Lexer::getSourceText( + ExtraReference = lexer::getSourceText( CharSourceRange::getCharRange(Tok, Tok.getLocWithOffset(1)), SM, LO); if (ExtraReference != "*" && ExtraReference != "&") ExtraReference = ""; - TypeRange.setEnd(MainTypeEndLoc); + Info.Range.setEnd(MainTypeEndLoc); } - return { - Lexer::getSourceText(CharSourceRange::getTokenRange(TypeRange), SM, LO) - .str(), - ExtraReference.str()}; + + Info.Type = lexer::getSourceText(CharSourceRange::getTokenRange(Info.Range), + SM, LO); + Info.Qualifier = ExtraReference; + return Info; }(); + + const SourceRange TypeRange = TI.Range; + const bool FunctionPointerCase = TI.FunctionPointerCase; + std::string Type = TI.Type; + const std::string QualifierStr = TI.Qualifier; const StringRef Name = MatchedDecl->getName(); + const SourceLocation NameLoc = MatchedDecl->getLocation(); SourceRange ReplaceRange = MatchedDecl->getSourceRange(); + const SourceLocation PrevReplacementEnd = LastReplacementEnd; // typedefs with multiple comma-separated definitions produce multiple // consecutive TypedefDecl nodes whose SourceRanges overlap. Each range starts @@ -201,10 +314,13 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { // But also we need to check that the ranges belong to the same file because // different files may contain overlapping ranges. std::string Using = "using "; - if (ReplaceRange.getBegin().isMacroID() || + const bool IsFirstTypedefInGroup = + ReplaceRange.getBegin().isMacroID() || (Result.SourceManager->getFileID(ReplaceRange.getBegin()) != Result.SourceManager->getFileID(LastReplacementEnd)) || - (ReplaceRange.getBegin() >= LastReplacementEnd)) { + (ReplaceRange.getBegin() >= LastReplacementEnd); + + if (IsFirstTypedefInGroup) { // This is the first (and possibly the only) TypedefDecl in a typedef. Save // Type and Name in case we find subsequent TypedefDecl's in this typedef. FirstTypedefType = Type; @@ -223,6 +339,27 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { Type = FirstTypedefName; } + const RangeTextInfo LeadingTextInfo = getLeadingTextInfo( + IsFirstTypedefInGroup, ReplaceRange, TypeRange, SM, LO); + RangeTextInfo SuffixTextInfo = + getSuffixTextInfo(FunctionPointerCase, IsFirstTypedefInGroup, + PrevReplacementEnd, TypeRange, NameLoc, SM, LO); + if (!IsFirstTypedefInGroup) + stripLeadingComma(SuffixTextInfo); + + const bool SuffixHasComment = SuffixTextInfo.Tokens.HasComment; + std::string SuffixText; + if (SuffixHasComment) { + SuffixText = SuffixTextInfo.Text; + } else if (QualifierStr.empty() && hasNonWhitespace(SuffixTextInfo.Text) && + SuffixTextInfo.Tokens.HasPointerOrRef && + !SuffixTextInfo.Tokens.HasIdentifier) { + // Only keep non-comment suffix text when it's purely pointer/ref trivia. + // This avoids accidentally pulling in keywords like 'typedef'. + SuffixText = SuffixTextInfo.Text; + } + const std::string QualifierText = SuffixHasComment ? "" : QualifierStr; + if (!ReplaceRange.getEnd().isMacroID()) { const SourceLocation::IntTy Offset = FunctionPointerCase ? 0 : Name.size(); LastReplacementEnd = ReplaceRange.getEnd().getLocWithOffset(Offset); @@ -235,14 +372,20 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { if (LastTagDeclRange != LastTagDeclRanges.end() && LastTagDeclRange->second.isValid() && ReplaceRange.fullyContains(LastTagDeclRange->second)) { - Type = std::string(Lexer::getSourceText( - CharSourceRange::getTokenRange(LastTagDeclRange->second), SM, LO)); + Type = lexer::getSourceText( + CharSourceRange::getTokenRange(LastTagDeclRange->second), SM, LO); if (Type.empty()) return; } - const std::string Replacement = - (Using + Name + " = " + Type + QualifierStr).str(); + std::string TypeExpr = + LeadingTextInfo.Text + Type + QualifierText + SuffixText; + TypeExpr = StringRef(TypeExpr).rtrim(" \t").str(); + StringRef Assign = " = "; + if (!TypeExpr.empty() && + (TypeExpr.front() == ' ' || TypeExpr.front() == '\t')) + Assign = " ="; + const std::string Replacement = (Using + Name + Assign + TypeExpr).str(); Diag << FixItHint::CreateReplacement(ReplaceRange, Replacement); } } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp index b77d985b76d77..5897f503e80fc 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -10,6 +10,7 @@ #include "clang/Basic/SourceManager.h" #include <optional> #include <utility> +#include <vector> namespace clang::tidy::utils::lexer { @@ -99,6 +100,146 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range, return false; } +std::vector<CommentToken> getCommentsInRange(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts) { + std::vector<CommentToken> Comments; + if (!Range.isValid()) + return Comments; + + const CharSourceRange FileRange = + Lexer::makeFileCharRange(Range, SM, LangOpts); + if (!FileRange.isValid()) + return Comments; + + const std::pair<FileID, unsigned> BeginLoc = + SM.getDecomposedLoc(FileRange.getBegin()); + const std::pair<FileID, unsigned> EndLoc = + SM.getDecomposedLoc(FileRange.getEnd()); + + if (BeginLoc.first != EndLoc.first) + return Comments; + + bool Invalid = false; + const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid); + if (Invalid) + return Comments; + + const char *StrData = Buffer.data() + BeginLoc.second; + + Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), LangOpts, + Buffer.begin(), StrData, Buffer.end()); + // Use raw lexing with comment retention so we can see comment tokens without + // preprocessing or macro expansion effects. + TheLexer.SetCommentRetentionState(true); + + while (true) { + Token Tok; + if (TheLexer.LexFromRawLexer(Tok)) + break; + if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() || + SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation())) + break; + + if (Tok.is(tok::comment)) { + const std::pair<FileID, unsigned> CommentLoc = + SM.getDecomposedLoc(Tok.getLocation()); + assert(CommentLoc.first == BeginLoc.first); + Comments.push_back({ + Tok.getLocation(), + StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength()), + }); + } else { + // Clear comments found before the different token, e.g. comma. Callers + // use this to retrieve only the contiguous comment block that directly + // precedes a token of interest. + Comments.clear(); + } + } + + return Comments; +} + +std::string getSourceText(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts) { + if (!Range.isValid()) + return {}; + + const CharSourceRange FileRange = + Lexer::makeFileCharRange(Range, SM, LangOpts); + if (!FileRange.isValid()) + return {}; + + bool Invalid = false; + const StringRef Text = + Lexer::getSourceText(FileRange, SM, LangOpts, &Invalid); + if (Invalid) + return {}; + return Text.str(); +} + +TokenRangeInfo analyzeTokenRange(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts) { + TokenRangeInfo Info; + if (!Range.isValid()) + return Info; + + const CharSourceRange FileRange = + Lexer::makeFileCharRange(Range, SM, LangOpts); + if (!FileRange.isValid()) + return Info; + + const std::pair<FileID, unsigned> BeginLoc = + SM.getDecomposedLoc(FileRange.getBegin()); + const std::pair<FileID, unsigned> EndLoc = + SM.getDecomposedLoc(FileRange.getEnd()); + + if (BeginLoc.first != EndLoc.first) + return Info; + + bool Invalid = false; + const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid); + if (Invalid) + return Info; + + const char *StrData = Buffer.data() + BeginLoc.second; + + Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), LangOpts, + Buffer.begin(), StrData, Buffer.end()); + // Use raw lexing with comment retention to capture comment tokens even + // though they are not part of the AST. + TheLexer.SetCommentRetentionState(true); + + while (true) { + Token Tok; + if (TheLexer.LexFromRawLexer(Tok)) + break; + + // Stop once we leave the requested file range. + if (Tok.is(tok::eof) || Tok.getLocation() == FileRange.getEnd() || + SM.isBeforeInTranslationUnit(FileRange.getEnd(), Tok.getLocation())) + break; + + if (Tok.is(tok::comment)) { + Info.HasComment = true; + continue; + } + + if (Tok.isOneOf(tok::star, tok::amp)) + Info.HasPointerOrRef = true; + + // Treat only identifiers and a small set of typedef-relevant keywords as + // "identifier-like" to avoid over-reporting on unrelated tokens. + if (tok::isAnyIdentifier(Tok.getKind()) || + Tok.isOneOf(tok::kw_typedef, tok::kw_struct, tok::kw_class, + tok::kw_union, tok::kw_enum, tok::kw_typename, + tok::kw_template)) + Info.HasIdentifier = true; + } + + return Info; +} + std::optional<Token> getQualifyingToken(tok::TokenKind TK, CharSourceRange Range, const ASTContext &Context, @@ -124,11 +265,11 @@ std::optional<Token> getQualifyingToken(tok::TokenKind TK, Tok.setIdentifierInfo(&Info); Tok.setKind(Info.getTokenID()); } - if (Tok.is(tok::less)) + if (Tok.is(tok::less)) { SawTemplate = true; - else if (Tok.isOneOf(tok::greater, tok::greatergreater)) + } else if (Tok.isOneOf(tok::greater, tok::greatergreater)) { LastMatchAfterTemplate = std::nullopt; - else if (Tok.is(TK)) { + } else if (Tok.is(TK)) { if (SawTemplate) LastMatchAfterTemplate = Tok; else diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.h b/clang-tools-extra/clang-tidy/utils/LexerUtils.h index 61389d86f22f4..cd0c6535ffffd 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.h +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.h @@ -13,7 +13,9 @@ #include "clang/Basic/TokenKinds.h" #include "clang/Lex/Lexer.h" #include <optional> +#include <string> #include <utility> +#include <vector> namespace clang { @@ -21,6 +23,18 @@ class Stmt; namespace tidy::utils::lexer { +struct TokenRangeInfo { + bool HasComment = false; + bool HasIdentifier = false; + bool HasPointerOrRef = false; +}; + +// Represents a comment token and its source location in the original file. +struct CommentToken { + SourceLocation Loc; + StringRef Text; +}; + /// Returns previous token or ``std::nullopt`` if not found. std::optional<Token> getPreviousToken(SourceLocation Location, const SourceManager &SM, @@ -113,6 +127,20 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts); +/// Returns comment tokens found in the given range. If a non-comment token is +/// encountered, clears previously collected comments and continues. +std::vector<CommentToken> getCommentsInRange(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts); + +/// Returns the source text for the given range or an empty string on failure. +std::string getSourceText(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts); + +/// Returns basic token information for the given range. +TokenRangeInfo analyzeTokenRange(CharSourceRange Range, const SourceManager &SM, + const LangOptions &LangOpts); + /// Assuming that ``Range`` spans a CVR-qualified type, returns the /// token in ``Range`` that is responsible for the qualification. ``Range`` /// must be valid with respect to ``SM``. Returns ``std::nullopt`` if no diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst index 0ad69f5fdc5aa..ca3d8ed977bc3 100644 --- a/clang-tools-extra/docs/ReleaseNotes.rst +++ b/clang-tools-extra/docs/ReleaseNotes.rst @@ -181,7 +181,8 @@ Changes in existing checks - Improved :doc:`modernize-use-using <clang-tidy/checks/modernize/use-using>` check by avoiding the generation - of invalid code for function types with redundant parentheses. + of invalid code for function types with redundant parentheses and preserving + inline comment blocks that appear between the typedef's parts. - Improved :doc:`performance-enum-size <clang-tidy/checks/performance/enum-size>` check: diff --git a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-using.cpp b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-using.cpp index 324616d274646..1e979a0be7b49 100644 --- a/clang-tools-extra/test/clang-tidy/checkers/modernize/use-using.cpp +++ b/clang-tools-extra/test/clang-tidy/checkers/modernize/use-using.cpp @@ -1,4 +1,4 @@ -// RUN: %check_clang_tidy %s modernize-use-using %t -- -- -fno-delayed-template-parsing -isystem %S/Inputs/use-using/ + // RUN: %check_clang_tidy %s modernize-use-using %t -- -- -fno-delayed-template-parsing -isystem %S/Inputs/use-using/ typedef int Type; // CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' [modernize-use-using] @@ -487,3 +487,70 @@ namespace GH176267 { // CHECK-MESSAGES: :[[@LINE-1]]:3: warning: use 'using' instead of 'typedef' [modernize-use-using] // CHECK-FIXES: using f6 = int (double); } + +namespace GH159518 { +typedef int // start and end chunks for cells in a line + Commented; // (end is chunk beyond end of line) +// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using Commented = int // start and end chunks for cells in a line +// CHECK-FIXES-NEXT: ; // (end is chunk beyond end of line) + +typedef /*prefix*/ int PrefixCommented; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using PrefixCommented = /*prefix*/ int; + +typedef const /*qual*/ int QualCommented; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using QualCommented = const /*qual*/ int; + +typedef int /*between*/ BetweenCommented; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using BetweenCommented = int /*between*/; + +typedef int /*multi-line +comment*/ MultiLineCommented; +// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using MultiLineCommented = int /*multi-line +// CHECK-FIXES-NEXT: comment*/; + +typedef int // line comment 1 +// line comment 2 +// line comment 3 + MultiLineSlashCommented; +// CHECK-MESSAGES: :[[@LINE-4]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using MultiLineSlashCommented = int // line comment 1 +// CHECK-FIXES-NEXT: // line comment 2 +// CHECK-FIXES-NEXT: // line comment 3 +// CHECK-FIXES-NEXT: ; + +typedef int * /*ptr*/ PtrCommented; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using PtrCommented = int * /*ptr*/; + +typedef int AfterNameCommented /*after*/; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using AfterNameCommented = int /*after*/; + +typedef int TrailingCommented; // trailing +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using TrailingCommented = int; // trailing + +typedef int MultiA, /*between comma*/ *MultiB; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-MESSAGES: :[[@LINE-2]]:{{[0-9]+}}: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using MultiA = int; +// CHECK-FIXES-NEXT: using MultiB = MultiA /*between comma*/ *; + +struct TagCommented; +typedef struct /*tag*/ TagCommented TagCommentedAlias; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using TagCommentedAlias = struct /*tag*/ TagCommented; + +typedef int (* /*fp*/ FuncPtrCommented)(int); +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using FuncPtrCommented = int (* /*fp*/ )(int); + +typedef TwoArgTemplate</*tmpl*/ int, int> TemplateArgCommented; +// CHECK-MESSAGES: :[[@LINE-1]]:1: warning: use 'using' instead of 'typedef' +// CHECK-FIXES: using TemplateArgCommented = TwoArgTemplate</*tmpl*/ int, int>; +} // namespace GH159518 diff --git a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt index 64bf47e61736c..167f5d3def06b 100644 --- a/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-tidy/CMakeLists.txt @@ -24,6 +24,7 @@ add_extra_unittest(ClangTidyTests DeclRefExprUtilsTest.cpp IncludeCleanerTest.cpp IncludeInserterTest.cpp + LexerUtilsTest.cpp GlobListTest.cpp GoogleModuleTest.cpp LLVMModuleTest.cpp diff --git a/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp new file mode 100644 index 0000000000000..653db0ff4d790 --- /dev/null +++ b/clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp @@ -0,0 +1,311 @@ +//===--- LexerUtilsTest.cpp - clang-tidy ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "../clang-tidy/utils/LexerUtils.h" + +#include "clang/Basic/FileManager.h" +#include "clang/Basic/SourceManager.h" +#include "clang/Frontend/ASTUnit.h" +#include "clang/Serialization/PCHContainerOperations.h" +#include "clang/Tooling/Tooling.h" +#include "llvm/Support/Error.h" +#include "llvm/Testing/Annotations/Annotations.h" +#include "gtest/gtest.h" + +namespace clang { +namespace tidy { +namespace test { + +using clang::tooling::FileContentMappings; + +static std::unique_ptr<ASTUnit> +buildAST(StringRef Code, const FileContentMappings &Mappings = {}) { + std::vector<std::string> Args = {"-std=c++20"}; + return clang::tooling::buildASTFromCodeWithArgs( + Code, Args, "input.cc", "clang-tool", + std::make_shared<PCHContainerOperations>(), + clang::tooling::getClangStripDependencyFileAdjuster(), Mappings); +} + +static CharSourceRange rangeFromAnnotations(const llvm::Annotations &A, + const SourceManager &SM, FileID FID, + llvm::StringRef Name = "") { + const auto R = A.range(Name); + const SourceLocation Begin = + SM.getLocForStartOfFile(FID).getLocWithOffset(R.Begin); + const SourceLocation End = + SM.getLocForStartOfFile(FID).getLocWithOffset(R.End); + return CharSourceRange::getCharRange(Begin, End); +} + +namespace { + +TEST(LexerUtilsTest, GetSourceText) { + llvm::Annotations Code(R"cpp( +int main() { + [[int value = 42;]] +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + ASTContext &Context = AST->getASTContext(); + SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID()); + + EXPECT_EQ("int value = 42;", + utils::lexer::getSourceText(Range, SM, LangOpts)); +} + +TEST(LexerUtilsTest, GetSourceTextInvalidRange) { + std::unique_ptr<ASTUnit> AST = buildAST("int value = 0;"); + ASSERT_TRUE(AST); + ASTContext &Context = AST->getASTContext(); + SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + EXPECT_TRUE( + utils::lexer::getSourceText(CharSourceRange(), SM, LangOpts).empty()); +} + +TEST(LexerUtilsTest, GetSourceTextCrossFileRange) { + const char *Code = R"cpp( +#include "header.h" +int main() { return value; } +)cpp"; + FileContentMappings Mappings = {{"header.h", "int value;\n"}}; + std::unique_ptr<ASTUnit> AST = buildAST(Code, Mappings); + ASSERT_TRUE(AST); + + ASTContext &Context = AST->getASTContext(); + SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const SourceLocation MainBegin = SM.getLocForStartOfFile(SM.getMainFileID()); + llvm::Expected<FileEntryRef> HeaderRef = + SM.getFileManager().getFileRef("header.h"); + ASSERT_TRUE(static_cast<bool>(HeaderRef)); + const FileID HeaderID = SM.getOrCreateFileID(*HeaderRef, SrcMgr::C_User); + const SourceLocation HeaderBegin = SM.getLocForStartOfFile(HeaderID); + ASSERT_TRUE(HeaderBegin.isValid()); + + const CharSourceRange CrossRange = + CharSourceRange::getCharRange(MainBegin, HeaderBegin); + EXPECT_TRUE(utils::lexer::getSourceText(CrossRange, SM, LangOpts).empty()); +} + +TEST(LexerUtilsTest, AnalyzeTokenRangeInvalidRange) { + std::unique_ptr<ASTUnit> AST = buildAST("int value = 0;"); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const utils::lexer::TokenRangeInfo Info = + utils::lexer::analyzeTokenRange(CharSourceRange(), SM, LangOpts); + EXPECT_FALSE(Info.HasComment); + EXPECT_FALSE(Info.HasIdentifier); + EXPECT_FALSE(Info.HasPointerOrRef); +} + +TEST(LexerUtilsTest, AnalyzeTokenRangeCommentOnly) { + llvm::Annotations Code(R"cpp( +void f() { + [[/*comment*/]] +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID()); + const utils::lexer::TokenRangeInfo Info = + utils::lexer::analyzeTokenRange(Range, SM, LangOpts); + EXPECT_TRUE(Info.HasComment); + EXPECT_FALSE(Info.HasIdentifier); + EXPECT_FALSE(Info.HasPointerOrRef); +} + +TEST(LexerUtilsTest, AnalyzeTokenRangePointerAndReference) { + llvm::Annotations Code(R"cpp( +void f() { + int $ptr[[*]]Ptr; + int $ref[[&]]Ref = *Ptr; +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange PtrRange = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "ptr"); + const utils::lexer::TokenRangeInfo PtrInfo = + utils::lexer::analyzeTokenRange(PtrRange, SM, LangOpts); + EXPECT_TRUE(PtrInfo.HasPointerOrRef); + EXPECT_FALSE(PtrInfo.HasIdentifier); + EXPECT_FALSE(PtrInfo.HasComment); + + const CharSourceRange RefRange = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "ref"); + const utils::lexer::TokenRangeInfo RefInfo = + utils::lexer::analyzeTokenRange(RefRange, SM, LangOpts); + EXPECT_TRUE(RefInfo.HasPointerOrRef); + EXPECT_FALSE(RefInfo.HasIdentifier); + EXPECT_FALSE(RefInfo.HasComment); +} + +TEST(LexerUtilsTest, AnalyzeTokenRangeIdentifier) { + llvm::Annotations Code(R"cpp( +void f() { + int $id[[Name]] = 0; +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "id"); + const utils::lexer::TokenRangeInfo Info = + utils::lexer::analyzeTokenRange(Range, SM, LangOpts); + EXPECT_FALSE(Info.HasComment); + EXPECT_TRUE(Info.HasIdentifier); + EXPECT_FALSE(Info.HasPointerOrRef); +} + +TEST(LexerUtilsTest, AnalyzeTokenRangeIdentifierKeyword) { + llvm::Annotations Code(R"cpp( +$kw[[struct]] S {}; +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "kw"); + const utils::lexer::TokenRangeInfo Info = + utils::lexer::analyzeTokenRange(Range, SM, LangOpts); + EXPECT_FALSE(Info.HasComment); + EXPECT_TRUE(Info.HasIdentifier); + EXPECT_FALSE(Info.HasPointerOrRef); +} + +TEST(LexerUtilsTest, AnalyzeTokenRangeLogicalAnd) { + llvm::Annotations Code(R"cpp( +void f(bool a, bool b) { + bool c = a $and[[&&]] b; +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "and"); + const utils::lexer::TokenRangeInfo Info = + utils::lexer::analyzeTokenRange(Range, SM, LangOpts); + EXPECT_FALSE(Info.HasComment); + EXPECT_FALSE(Info.HasIdentifier); + EXPECT_FALSE(Info.HasPointerOrRef); +} + +TEST(LexerUtilsTest, GetCommentsInRangeAdjacentComments) { + llvm::Annotations Code(R"cpp( +void f() { + $range[[/*first*/ /*second*/]] + int x = 0; +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getCommentsInRange(Range, SM, LangOpts); + ASSERT_EQ(2u, Comments.size()); + EXPECT_EQ("/*first*/", Comments[0].Text); + EXPECT_EQ("/*second*/", Comments[1].Text); +} + +TEST(LexerUtilsTest, GetCommentsInRangeClearsOnToken) { + llvm::Annotations Code(R"cpp( +void f() { + int x = ($range[[/*first*/ 0, /*second*/]] 1); +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getCommentsInRange(Range, SM, LangOpts); + ASSERT_EQ(1u, Comments.size()); + EXPECT_EQ("/*second*/", Comments.front().Text); +} + +TEST(LexerUtilsTest, GetCommentsInRangeLineComments) { + llvm::Annotations Code(R"cpp( +void f() { + $range[[// first + // second + ]] + int x = 0; +} +)cpp"); + std::unique_ptr<ASTUnit> AST = buildAST(Code.code()); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const CharSourceRange Range = + rangeFromAnnotations(Code, SM, SM.getMainFileID(), "range"); + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getCommentsInRange(Range, SM, LangOpts); + ASSERT_EQ(2u, Comments.size()); + EXPECT_EQ("// first", Comments[0].Text); + EXPECT_EQ("// second", Comments[1].Text); +} + +TEST(LexerUtilsTest, GetCommentsInRangeInvalidRange) { + std::unique_ptr<ASTUnit> AST = buildAST("int value = 0;"); + ASSERT_TRUE(AST); + const ASTContext &Context = AST->getASTContext(); + const SourceManager &SM = Context.getSourceManager(); + const LangOptions &LangOpts = Context.getLangOpts(); + + const std::vector<utils::lexer::CommentToken> Comments = + utils::lexer::getCommentsInRange(CharSourceRange(), SM, LangOpts); + EXPECT_TRUE(Comments.empty()); +} + +} // namespace + +} // namespace test +} // namespace tidy +} // namespace clang _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
