curdeius created this revision. curdeius added reviewers: MyDeveloperDay, HazardyKnusperkeks, owenpan, ksyx. Herald added a project: All. curdeius requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
Fixes https://github.com/llvm/llvm-project/issues/54522. This fixes regression introduced in https://github.com/llvm/llvm-project/commit/5e5efd8a91f2e340e79a73bedbc6ab66ad4a4281. Before the culprit commit, macros in WhitespaceSensitiveMacros were correctly formatted even if their closing parenthesis weren't followed by semicolon (or, to be precise, when they were followed by a newline). That commit changed the type of the macro token type from TT_UntouchableMacroFunc to TT_FunctionLikeOrFreestandingMacro. Correct formatting (with `WhitespaceSensitiveMacros = ['FOO']`): FOO(1+2) FOO(1+2); Regressed formatting: FOO(1 + 2) FOO(1+2); Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D123676 Files: clang/lib/Format/FormatTokenLexer.cpp clang/lib/Format/FormatTokenLexer.h clang/lib/Format/UnwrappedLineParser.cpp clang/unittests/Format/FormatTest.cpp
Index: clang/unittests/Format/FormatTest.cpp =================================================================== --- clang/unittests/Format/FormatTest.cpp +++ clang/unittests/Format/FormatTest.cpp @@ -23540,6 +23540,11 @@ // Don't use the helpers here, since 'mess up' will change the whitespace // and these are all whitespace sensitive by definition + + // Newlines are important here. + EXPECT_EQ("FOO(1+2 );\n", format("FOO(1+2 );\n", Style)); + EXPECT_EQ("FOO(1+2 )\n", format("FOO(1+2 )\n", Style)); + EXPECT_EQ("FOO(String-ized&Messy+But(: :Still)=Intentional);", format("FOO(String-ized&Messy+But(: :Still)=Intentional);", Style)); EXPECT_EQ( Index: clang/lib/Format/UnwrappedLineParser.cpp =================================================================== --- clang/lib/Format/UnwrappedLineParser.cpp +++ clang/lib/Format/UnwrappedLineParser.cpp @@ -1787,7 +1787,8 @@ : CommentsBeforeNextToken.front()->NewlinesBefore > 0; if (FollowedByNewline && (Text.size() >= 5 || FunctionLike) && - tokenCanStartNewLine(*FormatTok) && Text == Text.upper()) { + tokenCanStartNewLine(*FormatTok) && Text == Text.upper() && + !PreviousToken->isTypeFinalized()) { PreviousToken->setFinalizedType(TT_FunctionLikeOrFreestandingMacro); addUnwrappedLine(); return; Index: clang/lib/Format/FormatTokenLexer.h =================================================================== --- clang/lib/Format/FormatTokenLexer.h +++ clang/lib/Format/FormatTokenLexer.h @@ -114,7 +114,12 @@ unsigned FirstInLineIndex; SmallVector<FormatToken *, 16> Tokens; - llvm::SmallMapVector<IdentifierInfo *, TokenType, 8> Macros; + struct MacroTokenInfo { + TokenType Type; + bool Finalized; + }; + + llvm::SmallMapVector<IdentifierInfo *, MacroTokenInfo, 8> Macros; bool FormattingDisabled; Index: clang/lib/Format/FormatTokenLexer.cpp =================================================================== --- clang/lib/Format/FormatTokenLexer.cpp +++ clang/lib/Format/FormatTokenLexer.cpp @@ -39,37 +39,38 @@ for (const std::string &ForEachMacro : Style.ForEachMacros) { auto Identifier = &IdentTable.get(ForEachMacro); - Macros.insert({Identifier, TT_ForEachMacro}); + Macros.insert({Identifier, {TT_ForEachMacro, /*Finalized=*/false}}); } for (const std::string &IfMacro : Style.IfMacros) { auto Identifier = &IdentTable.get(IfMacro); - Macros.insert({Identifier, TT_IfMacro}); + Macros.insert({Identifier, {TT_IfMacro, /*Finalized=*/false}}); } for (const std::string &AttributeMacro : Style.AttributeMacros) { auto Identifier = &IdentTable.get(AttributeMacro); - Macros.insert({Identifier, TT_AttributeMacro}); + Macros.insert({Identifier, {TT_AttributeMacro, /*Finalized=*/false}}); } for (const std::string &StatementMacro : Style.StatementMacros) { auto Identifier = &IdentTable.get(StatementMacro); - Macros.insert({Identifier, TT_StatementMacro}); + Macros.insert({Identifier, {TT_StatementMacro, /*Finalized=*/false}}); } for (const std::string &TypenameMacro : Style.TypenameMacros) { auto Identifier = &IdentTable.get(TypenameMacro); - Macros.insert({Identifier, TT_TypenameMacro}); + Macros.insert({Identifier, {TT_TypenameMacro, /*Finalized=*/false}}); } for (const std::string &NamespaceMacro : Style.NamespaceMacros) { auto Identifier = &IdentTable.get(NamespaceMacro); - Macros.insert({Identifier, TT_NamespaceMacro}); + Macros.insert({Identifier, {TT_NamespaceMacro, /*Finalized=*/false}}); } for (const std::string &WhitespaceSensitiveMacro : Style.WhitespaceSensitiveMacros) { auto Identifier = &IdentTable.get(WhitespaceSensitiveMacro); - Macros.insert({Identifier, TT_UntouchableMacroFunc}); + Macros.insert({Identifier, {TT_UntouchableMacroFunc, /*Finalized=*/true}}); } for (const std::string &StatementAttributeLikeMacro : Style.StatementAttributeLikeMacros) { auto Identifier = &IdentTable.get(StatementAttributeLikeMacro); - Macros.insert({Identifier, TT_StatementAttributeLikeMacro}); + Macros.insert( + {Identifier, {TT_StatementAttributeLikeMacro, /*Finalized=*/false}}); } } @@ -1027,8 +1028,12 @@ Tokens.back()->Tok.getIdentifierInfo()->getPPKeywordID() == tok::pp_define) && it != Macros.end()) { - FormatTok->setType(it->second); - if (it->second == TT_IfMacro) { + if (it->second.Finalized) { + FormatTok->setFinalizedType(it->second.Type); + } else { + FormatTok->setType(it->second.Type); + } + if (it->second.Type == TT_IfMacro) { // The lexer token currently has type tok::kw_unknown. However, for this // substitution to be treated correctly in the TokenAnnotator, faking // the tok value seems to be needed. Not sure if there's a more elegant
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits