llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang-tools-extra @llvm/pr-subscribers-clang-tidy Author: Daniil Dudkin (unterumarmung) <details> <summary>Changes</summary> Keep comment blocks between typedef type and name by analyzing the raw lexer range, and avoid injecting unrelated tokens into the replacement. Reuse comment-trivia handling from `bugprone-argument-comment` and add unit tests for selected `LexerUtils` functions while extending `modernize-use-using` regression coverage. Fixes https://github.com/llvm/llvm-project/issues/159518. --- Patch is 41.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/180366.diff 8 Files Affected: - (modified) clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp (+13-57) - (modified) clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp (+191-48) - (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.cpp (+143-3) - (modified) clang-tools-extra/clang-tidy/utils/LexerUtils.h (+28) - (modified) clang-tools-extra/docs/ReleaseNotes.rst (+4) - (modified) clang-tools-extra/test/clang-tidy/checkers/modernize/use-using.cpp (+68-1) - (modified) clang-tools-extra/unittests/clang-tidy/CMakeLists.txt (+1) - (added) clang-tools-extra/unittests/clang-tidy/LexerUtilsTest.cpp (+312) ``````````diff diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp index d46896808bd09..a94daf4d15357 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp @@ -77,55 +77,9 @@ void ArgumentCommentCheck::registerMatchers(MatchFinder *Finder) { this); } -static std::vector<std::pair<SourceLocation, StringRef>> -getCommentsInRange(ASTContext *Ctx, CharSourceRange Range) { - std::vector<std::pair<SourceLocation, StringRef>> Comments; - auto &SM = Ctx->getSourceManager(); - const std::pair<FileID, unsigned> BeginLoc = - SM.getDecomposedLoc(Range.getBegin()), - EndLoc = - SM.getDecomposedLoc(Range.getEnd()); - - if (BeginLoc.first != EndLoc.first) - return Comments; - - bool Invalid = false; - const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid); - if (Invalid) - return Comments; - - const char *StrData = Buffer.data() + BeginLoc.second; - - Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), Ctx->getLangOpts(), - Buffer.begin(), StrData, Buffer.end()); - TheLexer.SetCommentRetentionState(true); - - while (true) { - Token Tok; - if (TheLexer.LexFromRawLexer(Tok)) - break; - if (Tok.getLocation() == Range.getEnd() || Tok.is(tok::eof)) - break; - - if (Tok.is(tok::comment)) { - const std::pair<FileID, unsigned> CommentLoc = - SM.getDecomposedLoc(Tok.getLocation()); - assert(CommentLoc.first == BeginLoc.first); - Comments.emplace_back( - Tok.getLocation(), - StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength())); - } else { - // Clear comments found before the different token, e.g. comma. - Comments.clear(); - } - } - - return Comments; -} - -static std::vector<std::pair<SourceLocation, StringRef>> +static std::vector<utils::lexer::CommentToken> getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) { - std::vector<std::pair<SourceLocation, StringRef>> Comments; + std::vector<utils::lexer::CommentToken> Comments; while (Loc.isValid()) { const std::optional<Token> Tok = utils::lexer::getPreviousToken( Loc, Ctx->getSourceManager(), Ctx->getLangOpts(), @@ -133,11 +87,12 @@ getCommentsBeforeLoc(ASTContext *Ctx, SourceLocation Loc) { if (!Tok || Tok->isNot(tok::comment)) break; Loc = Tok->getLocation(); - Comments.emplace_back( + Comments.emplace_back(utils::lexer::CommentToken{ Loc, Lexer::getSourceText(CharSourceRange::getCharRange( Loc, Loc.getLocWithOffset(Tok->getLength())), - Ctx->getSourceManager(), Ctx->getLangOpts())); + Ctx->getSourceManager(), Ctx->getLangOpts()), + }); } return Comments; } @@ -304,9 +259,10 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx, MakeFileCharRange(ArgBeginLoc, Args[I]->getBeginLoc()); ArgBeginLoc = Args[I]->getEndLoc(); - std::vector<std::pair<SourceLocation, StringRef>> Comments; + std::vector<utils::lexer::CommentToken> Comments; if (BeforeArgument.isValid()) { - Comments = getCommentsInRange(Ctx, BeforeArgument); + Comments = utils::lexer::getCommentsInRange( + BeforeArgument, Ctx->getSourceManager(), Ctx->getLangOpts()); } else { // Fall back to parsing back from the start of the argument. const CharSourceRange ArgsRange = @@ -314,18 +270,18 @@ void ArgumentCommentCheck::checkCallArgs(ASTContext *Ctx, Comments = getCommentsBeforeLoc(Ctx, ArgsRange.getBegin()); } - for (auto Comment : Comments) { + for (const auto &Comment : Comments) { llvm::SmallVector<StringRef, 2> Matches; - if (IdentRE.match(Comment.second, &Matches) && + if (IdentRE.match(Comment.Text, &Matches) && !sameName(Matches[2], II->getName(), StrictMode)) { { const DiagnosticBuilder Diag = - diag(Comment.first, "argument name '%0' in comment does not " - "match parameter name %1") + diag(Comment.Loc, "argument name '%0' in comment does not " + "match parameter name %1") << Matches[2] << II; if (isLikelyTypo(Callee->parameters(), Matches[2], II->getName())) { Diag << FixItHint::CreateReplacement( - Comment.first, (Matches[1] + II->getName() + Matches[3]).str()); + Comment.Loc, (Matches[1] + II->getName() + Matches[3]).str()); } } diag(PVD->getLocation(), "%0 declared here", DiagnosticIDs::Note) << II; diff --git a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp index 4d9b5ee568d80..cc467407649b2 100644 --- a/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp +++ b/clang-tools-extra/clang-tidy/modernize/UseUsingCheck.cpp @@ -26,6 +26,129 @@ AST_MATCHER(clang::LinkageSpecDecl, isExternCLinkage) { namespace clang::tidy::modernize { +namespace lexer = clang::tidy::utils::lexer; + +namespace { +struct RangeTextInfo { + std::string Text; + lexer::TokenRangeInfo Tokens; +}; +} // namespace + +static bool hasNonWhitespace(llvm::StringRef Text) { + return Text.find_first_not_of(" \t\n\r\f\v") != llvm::StringRef::npos; +} + +static RangeTextInfo getRangeTextInfo(clang::SourceLocation Begin, + clang::SourceLocation End, + const clang::SourceManager &SM, + const clang::LangOptions &LangOpts) { + RangeTextInfo Info; + if (!Begin.isValid() || !End.isValid() || Begin.isMacroID() || + End.isMacroID()) + return Info; + + const clang::CharSourceRange Range = + clang::CharSourceRange::getCharRange(Begin, End); + Info.Text = lexer::getSourceText(Range, SM, LangOpts); + Info.Tokens = lexer::analyzeTokenRange(Range, SM, LangOpts); + return Info; +} + +static std::string getFunctionPointerTypeText(clang::SourceRange TypeRange, + clang::SourceLocation NameLoc, + const clang::SourceManager &SM, + const clang::LangOptions &LO) { + clang::SourceLocation StartLoc = NameLoc; + clang::SourceLocation EndLoc = NameLoc; + + while (true) { + const std::optional<clang::Token> Prev = + lexer::getPreviousToken(StartLoc, SM, LO); + const std::optional<clang::Token> Next = + lexer::findNextTokenSkippingComments(EndLoc, SM, LO); + if (!Prev || Prev->isNot(clang::tok::l_paren) || !Next || + Next->isNot(clang::tok::r_paren)) + break; + + StartLoc = Prev->getLocation(); + EndLoc = Next->getLocation(); + } + + const clang::CharSourceRange RangeLeftOfIdentifier = + clang::CharSourceRange::getCharRange(TypeRange.getBegin(), StartLoc); + const clang::CharSourceRange RangeRightOfIdentifier = + clang::CharSourceRange::getCharRange( + clang::Lexer::getLocForEndOfToken(EndLoc, 0, SM, LO), + clang::Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO)); + return lexer::getSourceText(RangeLeftOfIdentifier, SM, LO) + + lexer::getSourceText(RangeRightOfIdentifier, SM, LO); +} + +static RangeTextInfo getLeadingTextInfo(bool IsFirstTypedefInGroup, + clang::SourceRange ReplaceRange, + clang::SourceRange TypeRange, + const clang::SourceManager &SM, + const clang::LangOptions &LO) { + RangeTextInfo Info; + if (!IsFirstTypedefInGroup) + return Info; + + const clang::SourceLocation TypedefEnd = + clang::Lexer::getLocForEndOfToken(ReplaceRange.getBegin(), 0, SM, LO); + Info = getRangeTextInfo(TypedefEnd, TypeRange.getBegin(), SM, LO); + // Keep leading trivia only when it actually contains comments. This avoids + // shifting plain whitespace from between 'typedef' and the type into the + // replacement, preserving formatting for un-commented declarations. + if (!Info.Tokens.HasComment) + Info.Text.clear(); + return Info; +} + +static RangeTextInfo getSuffixTextInfo(bool FunctionPointerCase, + bool IsFirstTypedefInGroup, + clang::SourceLocation PrevReplacementEnd, + clang::SourceRange TypeRange, + clang::SourceLocation NameLoc, + const clang::SourceManager &SM, + const clang::LangOptions &LO) { + RangeTextInfo Info; + if (FunctionPointerCase) + return Info; + + // Capture the raw text between type and name to preserve trailing comments, + // including multi-line // blocks, without re-lexing individual comment + // tokens. + if (IsFirstTypedefInGroup) { + const clang::SourceLocation AfterType = + clang::Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO); + return getRangeTextInfo(AfterType, NameLoc, SM, LO); + } + + if (!PrevReplacementEnd.isValid() || PrevReplacementEnd.isMacroID()) + return Info; + + clang::SourceLocation AfterComma = PrevReplacementEnd; + if (const std::optional<clang::Token> NextTok = + lexer::findNextTokenSkippingComments(AfterComma, SM, LO)) { + if (NextTok->is(clang::tok::comma)) { + AfterComma = + clang::Lexer::getLocForEndOfToken(NextTok->getLocation(), 0, SM, LO); + } + } + return getRangeTextInfo(AfterComma, NameLoc, SM, LO); +} + +static void stripLeadingComma(RangeTextInfo &Info) { + if (Info.Text.empty()) + return; + // Overlapping ranges in multi-declarator typedefs can leave a leading comma + // in the captured suffix. Drop it so the replacement doesn't reintroduce it. + const size_t NonWs = Info.Text.find_first_not_of(" \t\n\r\f\v"); + if (NonWs != std::string::npos && Info.Text[NonWs] == ',') + Info.Text.erase(0, NonWs + 1); +} + static constexpr StringRef ExternCDeclName = "extern-c-decl"; static constexpr StringRef ParentDeclName = "parent-decl"; static constexpr StringRef TagDeclName = "tag-decl"; @@ -130,69 +253,59 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { const TypeLoc TL = MatchedDecl->getTypeSourceInfo()->getTypeLoc(); - bool FunctionPointerCase = false; - auto [Type, QualifierStr] = [MatchedDecl, this, &TL, &FunctionPointerCase, - &SM, - &LO]() -> std::pair<std::string, std::string> { - SourceRange TypeRange = TL.getSourceRange(); + struct TypeInfo { + SourceRange Range; + bool FunctionPointerCase = false; + std::string Type; + std::string Qualifier; + }; + + const TypeInfo TI = [&] { + TypeInfo Info; + Info.Range = TL.getSourceRange(); // Function pointer case, get the left and right side of the identifier // without the identifier. - if (TypeRange.fullyContains(MatchedDecl->getLocation())) { - FunctionPointerCase = true; - SourceLocation StartLoc = MatchedDecl->getLocation(); - SourceLocation EndLoc = MatchedDecl->getLocation(); - - while (true) { - const std::optional<Token> Prev = - utils::lexer::getPreviousToken(StartLoc, SM, LO); - const std::optional<Token> Next = - utils::lexer::findNextTokenSkippingComments(EndLoc, SM, LO); - if (!Prev || Prev->isNot(tok::l_paren) || !Next || - Next->isNot(tok::r_paren)) - break; - - StartLoc = Prev->getLocation(); - EndLoc = Next->getLocation(); - } - - const auto RangeLeftOfIdentifier = - CharSourceRange::getCharRange(TypeRange.getBegin(), StartLoc); - const auto RangeRightOfIdentifier = CharSourceRange::getCharRange( - Lexer::getLocForEndOfToken(EndLoc, 0, SM, LO), - Lexer::getLocForEndOfToken(TypeRange.getEnd(), 0, SM, LO)); - const std::string VerbatimType = - (Lexer::getSourceText(RangeLeftOfIdentifier, SM, LO) + - Lexer::getSourceText(RangeRightOfIdentifier, SM, LO)) - .str(); - return {VerbatimType, ""}; + if (Info.Range.fullyContains(MatchedDecl->getLocation())) { + Info.FunctionPointerCase = true; + Info.Type = getFunctionPointerTypeText( + Info.Range, MatchedDecl->getLocation(), SM, LO); + return Info; } - StringRef ExtraReference = ""; - if (MainTypeEndLoc.isValid() && TypeRange.fullyContains(MainTypeEndLoc)) { + std::string ExtraReference; + if (MainTypeEndLoc.isValid() && Info.Range.fullyContains(MainTypeEndLoc)) { // Each type introduced in a typedef can specify being a reference or // pointer type separately, so we need to figure out if the new using-decl // needs to be to a reference or pointer as well. - const SourceLocation Tok = utils::lexer::findPreviousAnyTokenKind( + const SourceLocation Tok = lexer::findPreviousAnyTokenKind( MatchedDecl->getLocation(), SM, LO, tok::TokenKind::star, tok::TokenKind::amp, tok::TokenKind::comma, tok::TokenKind::kw_typedef); - ExtraReference = Lexer::getSourceText( + ExtraReference = lexer::getSourceText( CharSourceRange::getCharRange(Tok, Tok.getLocWithOffset(1)), SM, LO); if (ExtraReference != "*" && ExtraReference != "&") ExtraReference = ""; - TypeRange.setEnd(MainTypeEndLoc); + Info.Range.setEnd(MainTypeEndLoc); } - return { - Lexer::getSourceText(CharSourceRange::getTokenRange(TypeRange), SM, LO) - .str(), - ExtraReference.str()}; + + Info.Type = lexer::getSourceText(CharSourceRange::getTokenRange(Info.Range), + SM, LO); + Info.Qualifier = ExtraReference; + return Info; }(); + + const SourceRange TypeRange = TI.Range; + const bool FunctionPointerCase = TI.FunctionPointerCase; + std::string Type = TI.Type; + const std::string QualifierStr = TI.Qualifier; const StringRef Name = MatchedDecl->getName(); + const SourceLocation NameLoc = MatchedDecl->getLocation(); SourceRange ReplaceRange = MatchedDecl->getSourceRange(); + const SourceLocation PrevReplacementEnd = LastReplacementEnd; // typedefs with multiple comma-separated definitions produce multiple // consecutive TypedefDecl nodes whose SourceRanges overlap. Each range starts @@ -201,10 +314,13 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { // But also we need to check that the ranges belong to the same file because // different files may contain overlapping ranges. std::string Using = "using "; - if (ReplaceRange.getBegin().isMacroID() || + const bool IsFirstTypedefInGroup = + ReplaceRange.getBegin().isMacroID() || (Result.SourceManager->getFileID(ReplaceRange.getBegin()) != Result.SourceManager->getFileID(LastReplacementEnd)) || - (ReplaceRange.getBegin() >= LastReplacementEnd)) { + (ReplaceRange.getBegin() >= LastReplacementEnd); + + if (IsFirstTypedefInGroup) { // This is the first (and possibly the only) TypedefDecl in a typedef. Save // Type and Name in case we find subsequent TypedefDecl's in this typedef. FirstTypedefType = Type; @@ -223,6 +339,27 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { Type = FirstTypedefName; } + const RangeTextInfo LeadingTextInfo = getLeadingTextInfo( + IsFirstTypedefInGroup, ReplaceRange, TypeRange, SM, LO); + RangeTextInfo SuffixTextInfo = + getSuffixTextInfo(FunctionPointerCase, IsFirstTypedefInGroup, + PrevReplacementEnd, TypeRange, NameLoc, SM, LO); + if (!IsFirstTypedefInGroup) + stripLeadingComma(SuffixTextInfo); + + const bool SuffixHasComment = SuffixTextInfo.Tokens.HasComment; + std::string SuffixText; + if (SuffixHasComment) { + SuffixText = SuffixTextInfo.Text; + } else if (QualifierStr.empty() && hasNonWhitespace(SuffixTextInfo.Text) && + SuffixTextInfo.Tokens.HasPointerOrRef && + !SuffixTextInfo.Tokens.HasIdentifier) { + // Only keep non-comment suffix text when it's purely pointer/ref trivia. + // This avoids accidentally pulling in keywords like 'typedef'. + SuffixText = SuffixTextInfo.Text; + } + const std::string QualifierText = SuffixHasComment ? "" : QualifierStr; + if (!ReplaceRange.getEnd().isMacroID()) { const SourceLocation::IntTy Offset = FunctionPointerCase ? 0 : Name.size(); LastReplacementEnd = ReplaceRange.getEnd().getLocWithOffset(Offset); @@ -235,14 +372,20 @@ void UseUsingCheck::check(const MatchFinder::MatchResult &Result) { if (LastTagDeclRange != LastTagDeclRanges.end() && LastTagDeclRange->second.isValid() && ReplaceRange.fullyContains(LastTagDeclRange->second)) { - Type = std::string(Lexer::getSourceText( - CharSourceRange::getTokenRange(LastTagDeclRange->second), SM, LO)); + Type = lexer::getSourceText( + CharSourceRange::getTokenRange(LastTagDeclRange->second), SM, LO); if (Type.empty()) return; } - const std::string Replacement = - (Using + Name + " = " + Type + QualifierStr).str(); + std::string TypeExpr = + LeadingTextInfo.Text + Type + QualifierText + SuffixText; + TypeExpr = StringRef(TypeExpr).rtrim(" \t").str(); + StringRef Assign = " = "; + if (!TypeExpr.empty() && + (TypeExpr.front() == ' ' || TypeExpr.front() == '\t')) + Assign = " ="; + const std::string Replacement = (Using + Name + Assign + TypeExpr).str(); Diag << FixItHint::CreateReplacement(ReplaceRange, Replacement); } } // namespace clang::tidy::modernize diff --git a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp index b77d985b76d77..d5ed776642f3f 100644 --- a/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp +++ b/clang-tools-extra/clang-tidy/utils/LexerUtils.cpp @@ -10,6 +10,7 @@ #include "clang/Basic/SourceManager.h" #include <optional> #include <utility> +#include <vector> namespace clang::tidy::utils::lexer { @@ -99,6 +100,145 @@ bool rangeContainsExpansionsOrDirectives(SourceRange Range, return false; } +std::vector<CommentToken> getCommentsInRange(CharSourceRange Range, + const SourceManager &SM, + const LangOptions &LangOpts) { + std::vector<CommentToken> Comments; + if (!Range.isValid()) + return Comments; + + const CharSourceRange FileRange = + Lexer::makeFileCharRange(Range, SM, LangOpts); + if (!FileRange.isValid()) + return Comments; + + const std::pair<FileID, unsigned> BeginLoc = + SM.getDecomposedLoc(FileRange.getBegin()); + const std::pair<FileID, unsigned> EndLoc = + SM.getDecomposedLoc(FileRange.getEnd()); + + if (BeginLoc.first != EndLoc.first) + return Comments; + + bool Invalid = false; + const StringRef Buffer = SM.getBufferData(BeginLoc.first, &Invalid); + if (Invalid) + return Comments; + + const char *StrData = Buffer.data() + BeginLoc.second; + + Lexer TheLexer(SM.getLocForStartOfFile(BeginLoc.first), LangOpts, + Buffer.begin(), StrData, Buffer.end()); + // Use raw lexing with comment retention so we can see comment tokens without + // preprocessing or macro expansion effects. + TheLexer.SetCommentRetentionState(true); + + while (true) { + Token Tok; + if (TheLexer.LexFromRawLexer(Tok)) + break; + if (Tok.getLocation() == FileRange.getEnd() || Tok.is(tok::eof)) + break; + + if (Tok.is(tok::comment)) { + const std::pair<FileID, unsigned> CommentLoc = + SM.getDecomposedLoc(Tok.getLocation()); + assert(CommentLoc.first == BeginLoc.first); + Comments.push_back({ + Tok.getLocation(), + StringRef(Buffer.begin() + CommentLoc.second, Tok.getLength()), + }); + } else { + // Clear comments found before the different token, e.g. comma.... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/180366 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
