klimek updated this revision to Diff 500121. klimek marked 6 inline comments as done. klimek added a comment.
Address review comments. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D144170/new/ https://reviews.llvm.org/D144170 Files: clang/include/clang/Format/Format.h clang/lib/Format/ContinuationIndenter.cpp clang/lib/Format/Format.cpp clang/lib/Format/FormatToken.h clang/lib/Format/MacroExpander.cpp clang/lib/Format/Macros.h clang/lib/Format/TokenAnalyzer.cpp clang/lib/Format/TokenAnalyzer.h clang/lib/Format/TokenAnnotator.cpp clang/lib/Format/TokenAnnotator.h clang/lib/Format/UnwrappedLineFormatter.cpp clang/lib/Format/UnwrappedLineParser.cpp clang/lib/Format/UnwrappedLineParser.h clang/lib/Format/WhitespaceManager.cpp clang/unittests/Format/FormatTest.cpp clang/unittests/Format/MacroCallReconstructorTest.cpp clang/unittests/Format/MacroExpanderTest.cpp clang/unittests/Format/TestLexer.h
Index: clang/unittests/Format/TestLexer.h =================================================================== --- clang/unittests/Format/TestLexer.h +++ clang/unittests/Format/TestLexer.h @@ -72,7 +72,8 @@ TokenList annotate(llvm::StringRef Code) { FormatTokenLexer Lex = getNewLexer(Code); auto Tokens = Lex.lex(); - UnwrappedLineParser Parser(Style, Lex.getKeywords(), 0, Tokens, *this); + UnwrappedLineParser Parser(SourceMgr.get(), Style, Lex.getKeywords(), 0, + Tokens, *this, Allocator, IdentTable); Parser.parse(); TokenAnnotator Annotator(Style, Lex.getKeywords()); for (auto &Line : UnwrappedLines) { Index: clang/unittests/Format/MacroExpanderTest.cpp =================================================================== --- clang/unittests/Format/MacroExpanderTest.cpp +++ clang/unittests/Format/MacroExpanderTest.cpp @@ -19,9 +19,16 @@ Lex.Allocator, Lex.IdentTable); } + std::string expand(MacroExpander &Macros, llvm::StringRef Name) { + EXPECT_TRUE(Macros.defined(Name)) + << "Macro not defined: \"" << Name << "\""; + return text(Macros.expand(Lex.id(Name), {})); + } + std::string expand(MacroExpander &Macros, llvm::StringRef Name, - const std::vector<std::string> &Args = {}) { - EXPECT_TRUE(Macros.defined(Name)); + const std::vector<std::string> &Args) { + EXPECT_TRUE(Macros.defined(Name)) + << "Macro not defined: \"" << Name << "\""; return text(Macros.expand(Lex.id(Name), lexArgs(Args))); } @@ -95,7 +102,7 @@ EXPECT_EQ("", expand(*Macros, "A")); EXPECT_EQ("b", expand(*Macros, "B")); EXPECT_EQ("c+c", expand(*Macros, "C")); - EXPECT_EQ("", expand(*Macros, "D")); + EXPECT_EQ("", expand(*Macros, "D", {})); } TEST_F(MacroExpanderTest, ExpandsWithArguments) { @@ -105,7 +112,6 @@ }); EXPECT_EQ("", expand(*Macros, "A", {"a"})); EXPECT_EQ("b1+b2+b3", expand(*Macros, "B", {"b1", "b2 + b3"})); - EXPECT_EQ("x+", expand(*Macros, "B", {"x"})); } TEST_F(MacroExpanderTest, AttributizesTokens) { @@ -200,6 +206,14 @@ EXPECT_ATTRIBUTES(Result, Attributes); } +TEST_F(MacroExpanderTest, Overloads) { + auto Macros = create({"A=x", "A()=y", "A(a)=a", "A(a, b)=a b"}); + EXPECT_EQ("x", expand(*Macros, "A")); + EXPECT_EQ("y", expand(*Macros, "A", {})); + EXPECT_EQ("z", expand(*Macros, "A", {"z"})); + EXPECT_EQ("xy", expand(*Macros, "A", {"x", "y"})); +} + } // namespace } // namespace format } // namespace clang Index: clang/unittests/Format/MacroCallReconstructorTest.cpp =================================================================== --- clang/unittests/Format/MacroCallReconstructorTest.cpp +++ clang/unittests/Format/MacroCallReconstructorTest.cpp @@ -33,13 +33,31 @@ TokenList expand(llvm::StringRef Name, const SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> &Args) { + return expandInternal(Name, Args); + } + + TokenList expand(llvm::StringRef Name) { return expandInternal(Name, {}); } + + TokenList expand(llvm::StringRef Name, const std::vector<std::string> &Args) { + return expandInternal(Name, lexArgs(Args)); + } + + const UnexpandedMap &getUnexpanded() const { return Unexpanded; } + + const TokenList &getTokens() const { return Tokens; } + +private: + TokenList expandInternal( + llvm::StringRef Name, + const std::optional<SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> + &Args) { auto *ID = Lex.id(Name); auto UnexpandedLine = std::make_unique<UnwrappedLine>(); UnexpandedLine->Tokens.push_back(ID); - if (!Args.empty()) { + if (Args && !Args->empty()) { UnexpandedLine->Tokens.push_back(Lex.id("(")); - for (auto I = Args.begin(), E = Args.end(); I != E; ++I) { - if (I != Args.begin()) + for (auto I = Args->begin(), E = Args->end(); I != E; ++I) { + if (I != Args->begin()) UnexpandedLine->Tokens.push_back(Lex.id(",")); UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(), I->end()); @@ -57,16 +75,6 @@ return UnexpandedTokens; } - TokenList expand(llvm::StringRef Name, - const std::vector<std::string> &Args = {}) { - return expand(Name, lexArgs(Args)); - } - - const UnexpandedMap &getUnexpanded() const { return Unexpanded; } - - const TokenList &getTokens() const { return Tokens; } - -private: llvm::SmallVector<TokenList, 1> lexArgs(const std::vector<std::string> &Args) { llvm::SmallVector<TokenList, 1> Result; @@ -563,34 +571,6 @@ EXPECT_THAT(std::move(Unexp).takeResult(), matchesLine(Expected)); } -TEST_F(MacroCallReconstructorTest, UnusedMacroArguments) { - auto Macros = createExpander({"X=x"}); - Expansion Exp(Lex, *Macros); - TokenList Call = Exp.expand("X", {"a", "b", "c"}); - - MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); - Unexp.addLine(line(Exp.getTokens())); - EXPECT_TRUE(Unexp.finished()); - Matcher U(Call, Lex); - EXPECT_THAT(std::move(Unexp).takeResult(), - matchesLine(line(U.consume("X(a, b, c)")))); -} - -TEST_F(MacroCallReconstructorTest, UnusedEmptyMacroArgument) { - auto Macros = createExpander({"X=x"}); - Expansion Exp(Lex, *Macros); - TokenList Call = Exp.expand("X", {std::string("")}); - - MacroCallReconstructor Unexp(0, Exp.getUnexpanded()); - Matcher E(Exp.getTokens(), Lex); - auto Semi = tokens(";"); - Unexp.addLine(line({E.consume("x"), Semi})); - EXPECT_TRUE(Unexp.finished()); - Matcher U(Call, Lex); - EXPECT_THAT(std::move(Unexp).takeResult(), - matchesLine(line({U.consume("X()"), Semi}))); -} - TEST_F(MacroCallReconstructorTest, ChildrenSplitAcrossArguments) { auto Macros = createExpander({"CALL(a, b)=f([]() a b)"}); Expansion Exp(Lex, *Macros); @@ -655,7 +635,7 @@ } TEST_F(MacroCallReconstructorTest, InvalidCodeSplittingBracesAcrossArgs) { - auto Macros = createExpander({"M(a, b)=(a) (b)"}); + auto Macros = createExpander({"M(a, b, c)=(a) (b) c"}); Expansion Exp(Lex, *Macros); TokenList Call = Exp.expand("M", {std::string("{"), "x", ""}); Index: clang/unittests/Format/FormatTest.cpp =================================================================== --- clang/unittests/Format/FormatTest.cpp +++ clang/unittests/Format/FormatTest.cpp @@ -22576,6 +22576,236 @@ "aaaallvm::outs()\n <<"); } +TEST_F(FormatTest, UnexpandConfiguredMacros) { + FormatStyle Style = getLLVMStyle(); + Style.Macros.push_back("CLASS=class C {"); + Style.Macros.push_back("SEMI=;"); + Style.Macros.push_back("STMT=f();"); + Style.Macros.push_back("ID(x)=x"); + Style.Macros.push_back("ID3(x, y, z)=x y z"); + Style.Macros.push_back("CALL(x)=f([] { x })"); + Style.Macros.push_back("ASSIGN_OR_RETURN(a, b)=a = (b)"); + Style.Macros.push_back("ASSIGN_OR_RETURN(a, b, c)=a = (b); if (x) return c"); + Style.Macros.push_back("MOCK_METHOD(r, n, a, s)=r n a s"); + + verifyFormat("ID(nested(a(b, c), d))", Style); + verifyFormat("CLASS\n" + " a *b;\n" + "};", + Style); + verifyFormat("SEMI\n" + "SEMI\n" + "SEMI", + Style); + verifyFormat("STMT\n" + "STMT\n" + "STMT", + Style); + verifyFormat("void f() { ID(a *b); }", Style); + verifyFormat(R"(ID( + { ID(a *b); }); +)", + Style); + verifyIncompleteFormat(R"(ID3({, ID(a *b), + ; + }); +)", + Style); + + verifyFormat("ID(CALL(CALL(return a * b;)));", Style); + + verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n" + " MySomewhatLongFunction(SomethingElse()));\n", + Style); + verifyFormat("ASSIGN_OR_RETURN(MySomewhatLongType *variable,\n" + " MySomewhatLongFunction(SomethingElse()), " + "ReturnMe());\n", + Style); + + verifyFormat(R"( +#define MACRO(a, b) ID(a + b) +)", + Style); + EXPECT_EQ(R"( +int a; +int b; +int c; +int d; +int e; +int f; +ID( + namespace foo { + int a; + } +) // namespace k +)", + format(R"( +int a; +int b; +int c; +int d; +int e; +int f; +ID(namespace foo { int a; }) // namespace k +)", + Style)); + verifyFormat(R"(ID( + // + ({ ; })) +)", + Style); + + Style.ColumnLimit = 35; + // FIXME: Arbitrary formatting of macros where the end of the logical + // line is in the middle of a macro call are not working yet. + verifyFormat(R"(ID( + void f(); + void) +ID(g) ID(()) ID( + ; + void g();) +)", + Style); + + Style.ColumnLimit = 10; + verifyFormat("STMT\n" + "STMT\n" + "STMT", + Style); + + EXPECT_EQ(R"( +ID(CALL(CALL( + a *b))); +)", + format(R"( +ID(CALL(CALL(a * b))); +)", + Style)); + + // FIXME: If we want to support unbalanced braces or parens from macro + // expansions we need to re-think how we propagate errors in + // TokenAnnotator::parseLine; for investigation, switching the inner loop of + // TokenAnnotator::parseLine to return LT_Other instead of LT_Invalid in case + // of !consumeToken() changes the formatting of the test below and makes it + // believe it has a fully correct formatting. + EXPECT_EQ(R"( +ID3( + { + CLASS + a *b; + }; + }, + ID(x *y); + , + STMT + STMT + STMT) +void f(); +)", + format(R"( +ID3({CLASS a*b; };}, ID(x*y);, STMT STMT STMT) +void f(); +)", + Style)); + + verifyFormat("ID(a(\n" + "#ifdef A\n" + " b, c\n" + "#else\n" + " d(e)\n" + "#endif\n" + " ))", + Style); + Style.ColumnLimit = 80; + verifyFormat(R"(ASSIGN_OR_RETURN( + // Comment + a b, c); +)", + Style); + Style.ColumnLimit = 30; + verifyFormat(R"(ASSIGN_OR_RETURN( + // Comment + // + a b, + xxxxxxxxxxxx( + yyyyyyyyyyyyyyyyy, + zzzzzzzzzzzzzzzzzz), + f([]() { + a(); + b(); + })); +)", + Style); + verifyFormat(R"(int a = []() { + ID( + x; + y; + z;) + ; +}(); +)", + Style); + EXPECT_EQ( + R"(ASSIGN_OR_RETURN(( +==== +#)) +})", + format(R"(ASSIGN_OR_RETURN(( +==== +#)) +})", + Style, SC_ExpectIncomplete)); + EXPECT_EQ(R"(ASSIGN_OR_RETURN( +} +( +==== +#), + a))", + format(R"(ASSIGN_OR_RETURN( +} +( +==== +#), +a))", + Style, SC_ExpectIncomplete)); + EXPECT_EQ(R"(ASSIGN_OR_RETURN(a +// +==== +# + <))", + format(R"(ASSIGN_OR_RETURN(a +// +==== +# + <))", + Style)); + verifyFormat("class C {\n" + " MOCK_METHOD(R, f,\n" + " (a *b, c *d),\n" + " (override));\n" + "};", + Style); +} + +TEST_F(FormatTest, KeepParensWhenExpandingObjectLikeMacros) { + FormatStyle Style = getLLVMStyle(); + Style.Macros.push_back("FN=class C { int f"); + verifyFormat("void f() {\n" + " FN(a *b);\n" + " };\n" + "}", + Style); +} + +TEST_F(FormatTest, DoesNotExpandFunctionLikeMacrosWithoutParens) { + FormatStyle Style = getLLVMStyle(); + Style.Macros.push_back("CLASS()=class C {"); + verifyFormat("CLASS void f();\n" + "}\n" + ";", + Style); +} + TEST_F(FormatTest, HandleUnbalancedImplicitBracesAcrossPPBranches) { std::string code = "#if A\n" "#if B\n" Index: clang/lib/Format/WhitespaceManager.cpp =================================================================== --- clang/lib/Format/WhitespaceManager.cpp +++ clang/lib/Format/WhitespaceManager.cpp @@ -49,7 +49,7 @@ unsigned Spaces, unsigned StartOfTokenColumn, bool IsAligned, bool InPPDirective) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; Tok.setDecision((Newlines > 0) ? FD_Break : FD_Continue); Changes.push_back(Change(Tok, /*CreateReplacement=*/true, Tok.WhitespaceRange, @@ -60,7 +60,7 @@ void WhitespaceManager::addUntouchableToken(const FormatToken &Tok, bool InPPDirective) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; Changes.push_back(Change(Tok, /*CreateReplacement=*/false, Tok.WhitespaceRange, /*Spaces=*/0, @@ -84,7 +84,7 @@ const FormatToken &Tok, unsigned Offset, unsigned ReplaceChars, StringRef PreviousPostfix, StringRef CurrentPrefix, bool InPPDirective, unsigned Newlines, int Spaces) { - if (Tok.Finalized) + if (Tok.Finalized || (Tok.MacroCtx && Tok.MacroCtx->Role == MR_ExpandedArg)) return; SourceLocation Start = Tok.getStartOfNonWhitespace().getLocWithOffset(Offset); Changes.push_back( Index: clang/lib/Format/UnwrappedLineParser.h =================================================================== --- clang/lib/Format/UnwrappedLineParser.h +++ clang/lib/Format/UnwrappedLineParser.h @@ -15,10 +15,14 @@ #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H +#include "Encoding.h" #include "FormatToken.h" +#include "Macros.h" #include "clang/Basic/IdentifierTable.h" #include "clang/Format/Format.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/Support/Regex.h" #include <list> #include <stack> @@ -76,6 +80,19 @@ unsigned FirstStartColumn = 0; }; +/// Interface for users of the UnwrappedLineParser to receive the parsed lines. +/// Parsing a single snippet of code can lead to multiple runs, where each +/// run is a coherent view of the file. +/// +/// For example, different runs are generated: +/// - for different combinations of #if blocks +/// - when macros are involved, for the expanded code and the as-written code +/// +/// Some tokens will only be visible in a subset of the runs. +/// For each run, \c UnwrappedLineParser will call \c consumeUnwrappedLine +/// for each parsed unwrapped line, and then \c finishRun to indicate +/// that the set of unwrapped lines before is one coherent view of the +/// code snippet to be formatted. class UnwrappedLineConsumer { public: virtual ~UnwrappedLineConsumer() {} @@ -87,10 +104,12 @@ class UnwrappedLineParser { public: - UnwrappedLineParser(const FormatStyle &Style, + UnwrappedLineParser(SourceManager &SourceMgr, const FormatStyle &Style, const AdditionalKeywords &Keywords, unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens, - UnwrappedLineConsumer &Callback); + UnwrappedLineConsumer &Callback, + llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, + IdentifierTable &IdentTable); void parse(); @@ -193,6 +212,8 @@ unsigned parseVerilogHierarchyHeader(); void parseVerilogTable(); void parseVerilogCaseLabel(); + std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> + parseMacroCall(); // Used by addUnwrappedLine to denote whether to keep or remove a level // when resetting the line state. @@ -236,16 +257,49 @@ bool isOnNewLine(const FormatToken &FormatTok); + // Returns whether there is a macro expansion in the line, i.e. a token that + // was expanded from a macro call. + bool containsExpansion(const UnwrappedLine &Line) const; + // Compute hash of the current preprocessor branch. // This is used to identify the different branches, and thus track if block // open and close in the same branch. size_t computePPHash() const; + bool parsingPPDirective() const { return CurrentLines != &Lines; } + // FIXME: We are constantly running into bugs where Line.Level is incorrectly // subtracted from beyond 0. Introduce a method to subtract from Line.Level // and use that everywhere in the Parser. std::unique_ptr<UnwrappedLine> Line; + // Lines that are created by macro expansion. + // When formatting code containing macro calls, we first format the expanded + // lines to set the token types correctly. Afterwards, we format the + // reconstructed macro calls, re-using the token types determined in the first + // step. + // ExpandedLines will be reset every time we create a new LineAndExpansion + // instance once a line containing macro calls has been parsed. + SmallVector<UnwrappedLine, 8> CurrentExpandedLines; + + // Maps from the first token of a top-level UnwrappedLine that contains + // a macro call to the replacement UnwrappedLines expanded from the macro + // call. + llvm::DenseMap<FormatToken *, SmallVector<UnwrappedLine, 8>> ExpandedLines; + + // Map from the macro identifier to a line containing the full unexpanded + // macro call. + llvm::DenseMap<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded; + + // For recursive macro expansions, trigger reconstruction only on the + // outermost expansion. + bool InExpansion = false; + + // Set while we reconstruct a macro call. + // For reconstruction, we feed the expanded lines into the reconstructor + // until it is finished. + std::optional<MacroCallReconstructor> Reconstruct; + // Comments are sorted into unwrapped lines by whether they are in the same // line as the previous token, or not. If not, they belong to the next token. // Since the next token might already be in a new unwrapped line, we need to @@ -345,13 +399,17 @@ // does not start at the beginning of the file. unsigned FirstStartColumn; + MacroExpander Macros; + friend class ScopedLineState; friend class CompoundStatementIndenter; }; struct UnwrappedLineNode { UnwrappedLineNode() : Tok(nullptr) {} - UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} + UnwrappedLineNode(FormatToken *Tok, + llvm::ArrayRef<UnwrappedLine> Children = {}) + : Tok(Tok), Children(Children.begin(), Children.end()) {} FormatToken *Tok; SmallVector<UnwrappedLine, 0> Children; Index: clang/lib/Format/UnwrappedLineParser.cpp =================================================================== --- clang/lib/Format/UnwrappedLineParser.cpp +++ clang/lib/Format/UnwrappedLineParser.cpp @@ -14,11 +14,15 @@ #include "UnwrappedLineParser.h" #include "FormatToken.h" +#include "FormatTokenLexer.h" #include "FormatTokenSource.h" +#include "Macros.h" #include "TokenAnnotator.h" #include "clang/Basic/TokenKinds.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_os_ostream.h" #include "llvm/Support/raw_ostream.h" #include <algorithm> @@ -143,11 +147,12 @@ unsigned OldLineLevel; }; -UnwrappedLineParser::UnwrappedLineParser(const FormatStyle &Style, - const AdditionalKeywords &Keywords, - unsigned FirstStartColumn, - ArrayRef<FormatToken *> Tokens, - UnwrappedLineConsumer &Callback) +UnwrappedLineParser::UnwrappedLineParser( + SourceManager &SourceMgr, const FormatStyle &Style, + const AdditionalKeywords &Keywords, unsigned FirstStartColumn, + ArrayRef<FormatToken *> Tokens, UnwrappedLineConsumer &Callback, + llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator, + IdentifierTable &IdentTable) : Line(new UnwrappedLine), MustBreakBeforeNextToken(false), CurrentLines(&Lines), Style(Style), Keywords(Keywords), CommentPragmasRegex(Style.CommentPragmas), Tokens(nullptr), @@ -155,7 +160,8 @@ IncludeGuard(Style.IndentPPDirectives == FormatStyle::PPDIS_None ? IG_Rejected : IG_Inited), - IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn) {} + IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), + Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) {} void UnwrappedLineParser::reset() { PPBranchLevel = -1; @@ -173,6 +179,15 @@ NestedTooDeep.clear(); PPStack.clear(); Line->FirstStartColumn = FirstStartColumn; + + if (!Unexpanded.empty()) + for (FormatToken *Token : AllTokens) + Token->MacroCtx.reset(); + CurrentExpandedLines.clear(); + ExpandedLines.clear(); + Unexpanded.clear(); + InExpansion = false; + Reconstruct.reset(); } void UnwrappedLineParser::parse() { @@ -196,12 +211,36 @@ } // Create line with eof token. + assert(FormatTok->is(tok::eof)); pushToken(FormatTok); addUnwrappedLine(); - for (const UnwrappedLine &Line : Lines) - Callback.consumeUnwrappedLine(Line); + // In a first run, format everything with the lines containing macro calls + // replaced by the expansion. + if (!ExpandedLines.empty()) { + LLVM_DEBUG(llvm::dbgs() << "Expanded lines:\n"); + for (const auto &Line : Lines) { + if (!Line.Tokens.empty()) { + auto it = ExpandedLines.find(Line.Tokens.begin()->Tok); + if (it != ExpandedLines.end()) { + for (const auto &Expanded : it->second) { + LLVM_DEBUG(printDebugInfo(Expanded)); + Callback.consumeUnwrappedLine(Expanded); + } + continue; + } + } + LLVM_DEBUG(printDebugInfo(Line)); + Callback.consumeUnwrappedLine(Line); + } + Callback.finishRun(); + } + LLVM_DEBUG(llvm::dbgs() << "Unwrapped lines:\n"); + for (const UnwrappedLine &Line : Lines) { + LLVM_DEBUG(printDebugInfo(Line)); + Callback.consumeUnwrappedLine(Line); + } Callback.finishRun(); Lines.clear(); while (!PPLevelBranchIndex.empty() && @@ -724,7 +763,7 @@ parseParens(); size_t NbPreprocessorDirectives = - CurrentLines == &Lines ? PreprocessorDirectives.size() : 0; + !parsingPPDirective() ? PreprocessorDirectives.size() : 0; addUnwrappedLine(); size_t OpeningLineIndex = CurrentLines->empty() @@ -4152,12 +4191,25 @@ Line->Level = OrigLevel; } +bool UnwrappedLineParser::containsExpansion(const UnwrappedLine &Line) const { + for (const auto &N : Line.Tokens) { + if (N.Tok->MacroCtx) + return true; + for (const UnwrappedLine &Child : N.Children) + if (containsExpansion(Child)) + return true; + } + return false; +} + void UnwrappedLineParser::addUnwrappedLine(LineLevel AdjustLevel) { if (Line->Tokens.empty()) return; LLVM_DEBUG({ - if (CurrentLines == &Lines) + if (!parsingPPDirective()) { + llvm::dbgs() << "Adding unwrapped line:\n"; printDebugInfo(*Line); + } }); // If this line closes a block when in Whitesmiths mode, remember that @@ -4168,7 +4220,39 @@ Line->MatchingOpeningBlockLineIndex != UnwrappedLine::kInvalidIndex && Style.BreakBeforeBraces == FormatStyle::BS_Whitesmiths; - CurrentLines->push_back(std::move(*Line)); + // If the current line was expanded from a macro call, we use it to + // reconstruct an unwrapped line from the structure of the expanded unwrapped + // line and the unexpanded token stream. + if (!parsingPPDirective() && !InExpansion && containsExpansion(*Line)) { + if (!Reconstruct) + Reconstruct.emplace(Line->Level, Unexpanded); + Reconstruct->addLine(*Line); + + // While the reconstructed unexpanded lines are stored in the normal + // flow of lines, the expanded lines are stored on the side to be analyzed + // in an extra step. + CurrentExpandedLines.push_back(std::move(*Line)); + + if (Reconstruct->finished()) { + UnwrappedLine Reconstructed = std::move(*Reconstruct).takeResult(); + assert(!Reconstructed.Tokens.empty() && + "Reconstructed must at least contain the macro identifier."); + assert(!parsingPPDirective()); + LLVM_DEBUG({ + llvm::dbgs() << "Adding unexpanded line:\n"; + printDebugInfo(Reconstructed); + }); + ExpandedLines[Reconstructed.Tokens.begin()->Tok] = CurrentExpandedLines; + Lines.push_back(std::move(Reconstructed)); + CurrentExpandedLines.clear(); + Reconstruct.reset(); + } + } else { + // At the top level we only get here when no unexpansion is going on, or + // when conditional formatting led to unfinished macro reconstructions. + assert(!Reconstruct || (CurrentLines != &Lines) || PPStack.size() > 0); + CurrentLines->push_back(std::move(*Line)); + } Line->Tokens.clear(); Line->MatchingOpeningBlockLineIndex = UnwrappedLine::kInvalidIndex; Line->FirstStartColumn = 0; @@ -4176,7 +4260,7 @@ if (ClosesWhitesmithsBlock && AdjustLevel == LineLevel::Remove) --Line->Level; - if (CurrentLines == &Lines && !PreprocessorDirectives.empty()) { + if (!parsingPPDirective() && !PreprocessorDirectives.empty()) { CurrentLines->append( std::make_move_iterator(PreprocessorDirectives.begin()), std::make_move_iterator(PreprocessorDirectives.end())); @@ -4470,6 +4554,75 @@ continue; } + if (FormatTok->is(tok::identifier) && + Macros.defined(FormatTok->TokenText) && + // FIXME: Allow expanding macros in preprocessor directives. + !Line->InPPDirective) { + FormatToken *ID = FormatTok; + unsigned Position = Tokens->getPosition(); + + // To correctly parse the code, we need to replace the tokens of the macro + // call with its expansion. + auto PreCall = std::move(Line); + Line.reset(new UnwrappedLine); + bool OldInExpansion = InExpansion; + InExpansion = true; + // We parse the macro call into a new line. + auto Args = parseMacroCall(); + InExpansion = OldInExpansion; + assert(Line->Tokens.front().Tok == ID); + // And remember the unexpanded macro call tokens. + auto UnexpandedLine = std::move(Line); + // Reset to the old line. + Line = std::move(PreCall); + + LLVM_DEBUG({ + llvm::dbgs() << "Call: " << ID->TokenText << "("; + if (Args) { + llvm::dbgs() << "("; + for (const auto &Arg : Args.value()) + for (const auto &T : Arg) + llvm::dbgs() << T->TokenText << " "; + llvm::dbgs() << ")"; + } + llvm::dbgs() << "\n"; + }); + if (Macros.objectLike(ID->TokenText) && Args && + !Macros.hasArity(ID->TokenText, Args->size())) { + // The macro is overloaded to be both object-like and function-like, + // but none of the function-like arities match the number of arguments. + // Thus, expand as object-like macro. + LLVM_DEBUG(llvm::dbgs() + << "Macro \"" << ID->TokenText + << "\" not overloaded for arity " << Args->size() + << ", using object-like overload."); + Args.reset(); + UnexpandedLine->Tokens.resize(1); + Tokens->setPosition(Position); + nextToken(); + assert(!Args && Macros.objectLike(ID->TokenText)); + } + if ((!Args && Macros.objectLike(ID->TokenText)) || + (Args && Macros.hasArity(ID->TokenText, Args->size()))) { + // Next, we insert the expanded tokens in the token stream at the + // current position, and continue parsing. + Unexpanded[ID] = std::move(UnexpandedLine); + SmallVector<FormatToken *, 8> New = Macros.expand(ID, Args); + if (!New.empty()) + FormatTok = Tokens->insertTokens(New); + + LLVM_DEBUG({ + llvm::dbgs() << "Expanded: "; + for (const auto &T : New) + llvm::dbgs() << T->TokenText << " "; + llvm::dbgs() << "\n"; + }); + } else { + Tokens->setPosition(Position); + FormatTok = ID; + } + } + if (!FormatTok->is(tok::comment)) { distributeComments(Comments, FormatTok); Comments.clear(); @@ -4483,6 +4636,66 @@ Comments.clear(); } +namespace { +template <typename Iterator> +void pushTokens(Iterator Begin, Iterator End, + llvm::SmallVectorImpl<FormatToken *> &Into) { + for (auto I = Begin; I != End; ++I) { + Into.push_back(I->Tok); + for (const auto &Child : I->Children) + pushTokens(Child.Tokens.begin(), Child.Tokens.end(), Into); + } +} +} // namespace + +std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> +UnwrappedLineParser::parseMacroCall() { + std::optional<llvm::SmallVector<llvm::SmallVector<FormatToken *, 8>, 1>> Args; + assert(Line->Tokens.empty()); + nextToken(); + if (!FormatTok->is(tok::l_paren)) + return Args; + nextToken(); + Args.emplace(); + auto ArgStart = std::prev(Line->Tokens.end()); + + int Parens = 0; + do { + switch (FormatTok->Tok.getKind()) { + case tok::l_paren: + ++Parens; + nextToken(); + break; + case tok::r_paren: { + if (Parens > 0) { + --Parens; + nextToken(); + break; + } + Args->push_back({}); + pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); + nextToken(); + return Args; + } + case tok::comma: { + if (Parens > 0) { + nextToken(); + break; + } + Args->push_back({}); + pushTokens(std::next(ArgStart), Line->Tokens.end(), Args->back()); + nextToken(); + ArgStart = std::prev(Line->Tokens.end()); + break; + } + default: + nextToken(); + break; + } + } while (!eof()); + return {}; +} + void UnwrappedLineParser::pushToken(FormatToken *Tok) { Line->Tokens.push_back(UnwrappedLineNode(Tok)); if (MustBreakBeforeNextToken) { Index: clang/lib/Format/UnwrappedLineFormatter.cpp =================================================================== --- clang/lib/Format/UnwrappedLineFormatter.cpp +++ clang/lib/Format/UnwrappedLineFormatter.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "UnwrappedLineFormatter.h" +#include "FormatToken.h" #include "NamespaceEndCommentsFixer.h" #include "WhitespaceManager.h" #include "llvm/Support/Debug.h" @@ -918,9 +919,22 @@ static void markFinalized(FormatToken *Tok) { for (; Tok; Tok = Tok->Next) { - Tok->Finalized = true; - for (AnnotatedLine *Child : Tok->Children) - markFinalized(Child->First); + if (Tok->MacroCtx && Tok->MacroCtx->Role == MR_ExpandedArg) { + // In the first pass we format all macro arguments in the expanded token + // stream. Instead of finalizing the macro arguments, we mark that they + // will be modified as unexpanded arguments (as part of the macro call + // formatting) in the next pass. + Tok->MacroCtx->Role = MR_UnexpandedArg; + // Reset whether spaces are required before this token, as that is context + // dependent, and that context may change when formatting the macro call. + // For example, given M(x) -> 2 * x, and the macro call M(var), + // the token 'var' will have SpacesRequiredBefore = 1 after being + // formatted as part of the expanded macro, but SpacesRequiredBefore = 0 + // for its position within the macro call. + Tok->SpacesRequiredBefore = 0; + } else { + Tok->Finalized = true; + } } } @@ -975,15 +989,15 @@ bool formatChildren(LineState &State, bool NewLine, bool DryRun, unsigned &Penalty) { const FormatToken *LBrace = State.NextToken->getPreviousNonComment(); + bool HasLBrace = LBrace && LBrace->is(tok::l_brace) && LBrace->is(BK_Block); FormatToken &Previous = *State.NextToken->Previous; - if (!LBrace || LBrace->isNot(tok::l_brace) || LBrace->isNot(BK_Block) || - Previous.Children.size() == 0) { + if (Previous.Children.size() == 0 || (!HasLBrace && !LBrace->MacroParent)) { // The previous token does not open a block. Nothing to do. We don't // assert so that we can simply call this function for all tokens. return true; } - if (NewLine) { + if (NewLine || Previous.MacroParent) { const ParenState &P = State.Stack.back(); int AdditionalIndent = @@ -1349,11 +1363,12 @@ NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker); unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine); bool FitsIntoOneLine = - TheLine.Last->TotalLength + Indent <= ColumnLimit || - (TheLine.Type == LT_ImportStatement && - (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || - (Style.isCSharp() && - TheLine.InPPDirective); // don't split #regions in C# + !TheLine.ContainsMacroCall && + (TheLine.Last->TotalLength + Indent <= ColumnLimit || + (TheLine.Type == LT_ImportStatement && + (!Style.isJavaScript() || !Style.JavaScriptWrapImports)) || + (Style.isCSharp() && + TheLine.InPPDirective)); // don't split #regions in C# if (Style.ColumnLimit == 0) { NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this) .formatLine(TheLine, NextStartColumn + Indent, Index: clang/lib/Format/TokenAnnotator.h =================================================================== --- clang/lib/Format/TokenAnnotator.h +++ clang/lib/Format/TokenAnnotator.h @@ -65,20 +65,32 @@ // left them in a different state. First->Previous = nullptr; FormatToken *Current = First; + addChildren(Line.Tokens.front(), Current); for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { + if (Node.Tok->MacroParent) + ContainsMacroCall = true; Current->Next = Node.Tok; Node.Tok->Previous = Current; Current = Current->Next; - Current->Children.clear(); - for (const auto &Child : Node.Children) { - Children.push_back(new AnnotatedLine(Child)); - Current->Children.push_back(Children.back()); - } + addChildren(Node, Current); + // FIXME: if we add children, previous will point to the token before + // the children; changing this requires significant changes across + // clang-format. } Last = Current; Last->Next = nullptr; } + void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { + Current->Children.clear(); + for (const auto &Child : Node.Children) { + Children.push_back(new AnnotatedLine(Child)); + if (Children.back()->ContainsMacroCall) + ContainsMacroCall = true; + Current->Children.push_back(Children.back()); + } + } + ~AnnotatedLine() { for (AnnotatedLine *Child : Children) delete Child; @@ -149,6 +161,9 @@ bool MightBeFunctionDecl; bool IsMultiVariableDeclStmt; + /// \c True if this line contains a macro call for which an expansion exists. + bool ContainsMacroCall = false; + /// \c True if this line should be formatted, i.e. intersects directly or /// indirectly with one of the input ranges. bool Affected; Index: clang/lib/Format/TokenAnnotator.cpp =================================================================== --- clang/lib/Format/TokenAnnotator.cpp +++ clang/lib/Format/TokenAnnotator.cpp @@ -2614,6 +2614,13 @@ // Consume operators with higher precedence. parse(Precedence + 1); + // Do not assign fake parenthesis to tokens that are part of an + // unexpanded macro call. The line within the macro call contains + // the parenthesis and commas, and we will not find operators within + // that structure. + if (Current && Current->MacroParent) + break; + int CurrentPrecedence = getCurrentPrecedence(); if (Precedence == CurrentPrecedence && Current && @@ -4389,8 +4396,12 @@ Left.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow)) { return true; } - if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen)) + if (Left.is(tok::comma) && !Right.is(TT_OverloadedOperatorLParen) && + // In an unexpanded macro call we only find the parentheses and commas + // in a line; the commas and closing parenthesis do not require a space. + (Left.Children.empty() || !Left.MacroParent)) { return true; + } if (Right.is(tok::comma)) return false; if (Right.is(TT_ObjCBlockLParen)) Index: clang/lib/Format/TokenAnalyzer.h =================================================================== --- clang/lib/Format/TokenAnalyzer.h +++ clang/lib/Format/TokenAnalyzer.h @@ -46,7 +46,7 @@ FileID getFileID() const { return ID; } - const SourceManager &getSourceManager() const { return SM; } + SourceManager &getSourceManager() const { return SM; } ArrayRef<CharSourceRange> getCharRanges() const { return CharRanges; } Index: clang/lib/Format/TokenAnalyzer.cpp =================================================================== --- clang/lib/Format/TokenAnalyzer.cpp +++ clang/lib/Format/TokenAnalyzer.cpp @@ -104,12 +104,12 @@ IdentifierTable IdentTable(getFormattingLangOpts(Style)); FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(), Env.getFirstStartColumn(), Style, Encoding, Allocator, - IdentTable); ArrayRef<FormatToken *> Toks(Lex.lex()); SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end()); - UnwrappedLineParser Parser(Style, Lex.getKeywords(), - Env.getFirstStartColumn(), Tokens, *this); + UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(), + Env.getFirstStartColumn(), Tokens, *this, + Allocator, IdentTable); Parser.parse(); assert(UnwrappedLines.back().empty()); unsigned Penalty = 0; Index: clang/lib/Format/Macros.h =================================================================== --- clang/lib/Format/Macros.h +++ clang/lib/Format/Macros.h @@ -106,17 +106,23 @@ IdentifierTable &IdentTable); ~MacroExpander(); - /// Returns whether a macro \p Name is defined. + /// Returns whether any macro \p Name is defined, regardless of overloads. bool defined(llvm::StringRef Name) const; - /// Returns whether the macro has no arguments and should not consume - /// subsequent parentheses. + /// Returns whetherh there is an object-like overload, i.e. where the macro + /// has no arguments and should not consume subsequent parentheses. bool objectLike(llvm::StringRef Name) const; + /// Returns whether macro \p Name provides an overload with the given arity. + bool hasArity(llvm::StringRef Name, unsigned Arity) const; + /// Returns the expanded stream of format tokens for \p ID, where /// each element in \p Args is a positional argument to the macro call. - llvm::SmallVector<FormatToken *, 8> expand(FormatToken *ID, - ArgsList Args) const; + /// If \p Args is not set, the object-like overload is used. + /// If \p Args is set, the overload with the arity equal to \c Args.size() is + /// used. + llvm::SmallVector<FormatToken *, 8> + expand(FormatToken *ID, std::optional<ArgsList> OptionalArgs) const; private: struct Definition; @@ -129,7 +135,8 @@ llvm::SpecificBumpPtrAllocator<FormatToken> &Allocator; IdentifierTable &IdentTable; SmallVector<std::unique_ptr<llvm::MemoryBuffer>> Buffers; - llvm::StringMap<Definition> Definitions; + llvm::StringMap<llvm::DenseMap<int, Definition>> FunctionLike; + llvm::StringMap<Definition> ObjectLike; }; /// Converts a sequence of UnwrappedLines containing expanded macros into a @@ -149,7 +156,7 @@ /// /// After this point, the state of the spelled/expanded stream is "in sync" /// (both at the start of an UnwrappedLine, with no macros open), so the -/// Unexpander can be thrown away and parsing can continue. +/// Reconstructor can be thrown away and parsing can continue. /// /// Given a mapping from the macro name identifier token in the macro call /// to the tokens of the macro call, for example: Index: clang/lib/Format/MacroExpander.cpp =================================================================== --- clang/lib/Format/MacroExpander.cpp +++ clang/lib/Format/MacroExpander.cpp @@ -141,24 +141,44 @@ if (!Tokens.empty()) { DefinitionParser Parser(Tokens); auto Definition = Parser.parse(); - Definitions[Definition.Name] = std::move(Definition); + if (Definition.ObjectLike) { + ObjectLike[Definition.Name] = std::move(Definition); + } else { + FunctionLike[Definition.Name][Definition.Params.size()] = + std::move(Definition); + } } } bool MacroExpander::defined(llvm::StringRef Name) const { - return Definitions.find(Name) != Definitions.end(); + return FunctionLike.find(Name) != FunctionLike.end() || + ObjectLike.find(Name) != ObjectLike.end(); } bool MacroExpander::objectLike(llvm::StringRef Name) const { - return Definitions.find(Name)->second.ObjectLike; + return ObjectLike.find(Name) != ObjectLike.end(); +} + +bool MacroExpander::hasArity(llvm::StringRef Name, unsigned Arity) const { + auto it = FunctionLike.find(Name); + return it != FunctionLike.end() && + (it->second.find(Arity) != it->second.end()); } -llvm::SmallVector<FormatToken *, 8> MacroExpander::expand(FormatToken *ID, - ArgsList Args) const { +llvm::SmallVector<FormatToken *, 8> +MacroExpander::expand(FormatToken *ID, + std::optional<ArgsList> OptionalArgs) const { assert(defined(ID->TokenText)); + assert( + (!OptionalArgs && ObjectLike.find(ID->TokenText) != ObjectLike.end()) || + (OptionalArgs && FunctionLike.find(ID->TokenText) != FunctionLike.end())); + const Definition &Def = OptionalArgs + ? FunctionLike.find(ID->TokenText) + ->second.find(OptionalArgs.value().size()) + ->second + : ObjectLike.find(ID->TokenText)->second; + ArgsList Args = OptionalArgs ? OptionalArgs.value() : ArgsList(); SmallVector<FormatToken *, 8> Result; - const Definition &Def = Definitions.find(ID->TokenText)->second; - // Expand each argument at most once. llvm::StringSet<> ExpandedArgs; Index: clang/lib/Format/FormatToken.h =================================================================== --- clang/lib/Format/FormatToken.h +++ clang/lib/Format/FormatToken.h @@ -377,6 +377,11 @@ /// binary operator. TokenType getType() const { return Type; } void setType(TokenType T) { + // If this token is a macro argument while formatting an unexpanded macro + // call, we do not change its type any more - the type was deduced from + // formatting the expanded macro stream already. + if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) + return; assert((!TypeIsFinalized || T == Type) && "Please use overwriteFixedType to change a fixed type."); Type = T; Index: clang/lib/Format/Format.cpp =================================================================== --- clang/lib/Format/Format.cpp +++ clang/lib/Format/Format.cpp @@ -1036,6 +1036,7 @@ IO.mapOptional("UseTab", Style.UseTab); IO.mapOptional("WhitespaceSensitiveMacros", Style.WhitespaceSensitiveMacros); + IO.mapOptional("Macros", Style.Macros); // If AlwaysBreakAfterDefinitionReturnType was specified but // AlwaysBreakAfterReturnType was not, initialize the latter from the Index: clang/lib/Format/ContinuationIndenter.cpp =================================================================== --- clang/lib/Format/ContinuationIndenter.cpp +++ clang/lib/Format/ContinuationIndenter.cpp @@ -18,6 +18,7 @@ #include "WhitespaceManager.h" #include "clang/Basic/OperatorPrecedence.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/TokenKinds.h" #include "clang/Format/Format.h" #include "llvm/ADT/StringSet.h" #include "llvm/Support/Debug.h" @@ -739,9 +740,14 @@ if (Previous.is(TT_TemplateString) && Previous.opensScope()) CurrentState.NoLineBreak = true; + // Align following lines within parentheses / brackets if configured. + // Note: This doesn't apply to macro expansion lines, which are MACRO( , , ) + // with args as children of the '(' and ',' tokens. It does not make sense to + // align the commas with the opening paren. if (Style.AlignAfterOpenBracket != FormatStyle::BAS_DontAlign && !CurrentState.IsCSharpGenericTypeConstraint && Previous.opensScope() && Previous.isNot(TT_ObjCMethodExpr) && Previous.isNot(TT_RequiresClause) && + !(Current.MacroParent && Previous.MacroParent) && (Current.isNot(TT_LineComment) || Previous.is(BK_BracedInit))) { CurrentState.Indent = State.Column + Spaces; CurrentState.IsAligned = true; Index: clang/include/clang/Format/Format.h =================================================================== --- clang/include/clang/Format/Format.h +++ clang/include/clang/Format/Format.h @@ -2745,6 +2745,39 @@ /// \version 3.7 std::string MacroBlockEnd; + /// A list of macros of the form \c <definition>=<expansion> . + /// + /// Code will be parsed with macros expanded, in order to determine how to + /// interpret and format the macro arguments. + /// + /// For example, the code: + /// \code + /// A(a*b); + /// \endcode + /// will usually be interpreted as a call to a function A, and the + /// multiplication expression will be formatted as `a * b`. + /// + /// If we specify the macro definition: + /// \code + /// Macros: + /// - A(x)=x + /// \endcode + /// the code will now be parsed as a declaration of the variable b of type a*, + /// and formatted as `a* b` (depending on pointer-binding rules). + /// + /// Features and restrictions: + /// * Both function-like macros and object-like macros are supported. + /// * Macro arguments must be used exactly once in the expansion. + /// * No recursive expansion; macros referencing other macros will be + /// ignored. + /// * Overloading by arity is supported: for example, given the macro + /// definitions A=x, A()=y, A(a)=a, + /// 'A;' -> 'x;' + /// 'A();' -> 'y;' + /// 'A(z);' -> 'z;' + /// 'A(a, b) will not be expanded. + std::vector<std::string> Macros; + /// The maximum number of consecutive empty lines to keep. /// \code /// MaxEmptyLinesToKeep: 1 vs. MaxEmptyLinesToKeep: 0 @@ -4306,7 +4339,8 @@ StatementAttributeLikeMacros == R.StatementAttributeLikeMacros && StatementMacros == R.StatementMacros && TabWidth == R.TabWidth && TypenameMacros == R.TypenameMacros && UseTab == R.UseTab && - WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros; + WhitespaceSensitiveMacros == R.WhitespaceSensitiveMacros && + Macros == R.Macros; } std::optional<FormatStyle> GetLanguageStyle(LanguageKind Language) const;
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits