klimek created this revision.
klimek added a reviewer: sammccall.
Herald added subscribers: cfe-commits, mgorny.
Herald added a project: clang.
klimek requested review of this revision.

MacroUnexpander applies the structural formatting of expanded lines into
UnwrappedLines to the corresponding unexpanded macro calls, resulting in
UnwrappedLines for the macro calls the user typed.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88299

Files:
  clang/lib/Format/CMakeLists.txt
  clang/lib/Format/FormatToken.h
  clang/lib/Format/MacroUnexpander.cpp
  clang/lib/Format/Macros.h
  clang/unittests/Format/CMakeLists.txt
  clang/unittests/Format/MacroUnexpanderTest.cpp

Index: clang/unittests/Format/MacroUnexpanderTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Format/MacroUnexpanderTest.cpp
@@ -0,0 +1,636 @@
+#include "../../lib/Format/Macros.h"
+#include "../../lib/Format/UnwrappedLineParser.h"
+#include "TestLexer.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <map>
+#include <memory>
+#include <vector>
+
+namespace clang {
+namespace format {
+namespace {
+
+using UnexpandedMap = std::map<FormatToken *, std::unique_ptr<UnwrappedLine>>;
+
+class Expansion {
+public:
+  Expansion(TestLexer &Lex, MacroExpander &Macros) : Lex(Lex), Macros(Macros) {}
+
+  TokenList
+  expand(llvm::StringRef Name,
+         const SmallVector<llvm::SmallVector<FormatToken *, 8>, 1> &Args) {
+    auto *ID = Lex.id(Name);
+    auto UnexpandedLine = std::make_unique<UnwrappedLine>();
+    UnexpandedLine->Tokens.push_back(ID);
+    if (!Args.empty()) {
+      UnexpandedLine->Tokens.push_back(Lex.id("("));
+      for (auto I = Args.begin(), E = Args.end(); I != E; ++I) {
+        if (I != Args.begin())
+          UnexpandedLine->Tokens.push_back(Lex.id(","));
+        UnexpandedLine->Tokens.insert(UnexpandedLine->Tokens.end(), I->begin(),
+                                      I->end());
+      }
+      UnexpandedLine->Tokens.push_back(Lex.id(")"));
+    }
+    Unexpanded[ID] = std::move(UnexpandedLine);
+
+    auto Expanded = uneof(Macros.expand(ID, Args));
+    Tokens.append(Expanded.begin(), Expanded.end());
+
+    TokenList UnexpandedTokens;
+    for (const UnwrappedLineNode &Node : Unexpanded[ID]->Tokens) {
+      UnexpandedTokens.push_back(Node.Tok);
+    }
+    return UnexpandedTokens;
+  }
+
+  TokenList expand(llvm::StringRef Name,
+                   const std::vector<std::string> &Args = {}) {
+    return expand(Name, lexArgs(Args));
+  }
+
+  const UnexpandedMap &getUnexpanded() const { return Unexpanded; }
+
+  const TokenList &getTokens() const { return Tokens; }
+
+private:
+  llvm::SmallVector<TokenList, 1>
+  lexArgs(const std::vector<std::string> &Args) {
+    llvm::SmallVector<TokenList, 1> Result;
+    for (const auto &Arg : Args) {
+      Result.push_back(uneof(Lex.lex(Arg)));
+    }
+    return Result;
+  }
+  std::map<FormatToken *, std::unique_ptr<UnwrappedLine>> Unexpanded;
+  llvm::SmallVector<FormatToken *, 8> Tokens;
+  TestLexer &Lex;
+  MacroExpander &Macros;
+};
+
+struct Chunk {
+  Chunk(llvm::ArrayRef<FormatToken *> Tokens)
+      : Tokens(Tokens.begin(), Tokens.end()) {}
+  Chunk(llvm::ArrayRef<UnwrappedLine> Children)
+      : Children(Children.begin(), Children.end()) {}
+  llvm::SmallVector<UnwrappedLineNode, 1> Tokens;
+  llvm::SmallVector<UnwrappedLine, 0> Children;
+};
+
+bool tokenMatches(const FormatToken *Left, const FormatToken *Right) {
+  if (Left->getType() == Right->getType() &&
+      Left->TokenText == Right->TokenText)
+    return true;
+  llvm::dbgs() << Left->TokenText << " != " << Right->TokenText << "\n";
+  return false;
+}
+
+struct Matcher {
+  Matcher(const TokenList &Tokens) : Tokens(Tokens), It(this->Tokens.begin()) {}
+
+  Chunk consume(const TokenList &Tokens) {
+    TokenList Result;
+    for (const FormatToken *Token : Tokens) {
+      assert(tokenMatches(*It, Token));
+      Result.push_back(*It);
+      ++It;
+    }
+    return Chunk(Result);
+  }
+
+  TokenList Tokens;
+  TokenList::iterator It;
+};
+
+UnexpandedMap mergeUnexpanded(const UnexpandedMap &M1,
+                              const UnexpandedMap &M2) {
+  UnexpandedMap Result;
+  for (const auto &KV : M1) {
+    Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+  }
+  for (const auto &KV : M2) {
+    Result[KV.first] = std::make_unique<UnwrappedLine>(*KV.second);
+  }
+  return Result;
+}
+
+class MacroUnexpanderTest : public ::testing::Test {
+public:
+  std::unique_ptr<MacroExpander>
+  create(const std::vector<std::string> &MacroDefinitions) {
+    return std::make_unique<MacroExpander>(MacroDefinitions,
+                                           Lex.SourceMgr.get(), Lex.Style,
+                                           Lex.Allocator, Lex.IdentTable);
+  }
+
+  UnwrappedLine line(llvm::ArrayRef<FormatToken *> Tokens) {
+    UnwrappedLine Result;
+    for (FormatToken *Tok : Tokens) {
+      Result.Tokens.push_back(UnwrappedLineNode(Tok));
+    }
+    return Result;
+  }
+
+  UnwrappedLine line(llvm::StringRef Text) { return line({lex(Text)}); }
+
+  UnwrappedLine line(llvm::ArrayRef<Chunk> Chunks) {
+    UnwrappedLine Result;
+    for (const Chunk &Chunk : Chunks) {
+      Result.Tokens.insert(Result.Tokens.end(), Chunk.Tokens.begin(),
+                           Chunk.Tokens.end());
+      assert(!Result.Tokens.empty());
+      Result.Tokens.back().Children.append(Chunk.Children.begin(),
+                                           Chunk.Children.end());
+    }
+    return Result;
+  }
+
+  TokenList lex(llvm::StringRef Text) { return uneof(Lex.lex(Text)); }
+
+  Chunk tokens(llvm::StringRef Text) { return Chunk(lex(Text)); }
+
+  Chunk children(llvm::ArrayRef<UnwrappedLine> Children) {
+    return Chunk(Children);
+  }
+
+  TestLexer Lex;
+};
+
+bool matchesTokens(const UnwrappedLine &L1, const UnwrappedLine &L2) {
+  if (L1.Tokens.size() != L2.Tokens.size())
+    return false;
+  for (auto L1It = L1.Tokens.begin(), L2It = L2.Tokens.begin();
+       L1It != L1.Tokens.end(); ++L1It, ++L2It) {
+    if (L1It->Tok != L2It->Tok)
+      return false;
+    if (L1It->Children.size() != L2It->Children.size())
+      return false;
+    for (auto L1ChildIt = L1It->Children.begin(),
+              L2ChildIt = L2It->Children.begin();
+         L1ChildIt != L1It->Children.end(); ++L1ChildIt, ++L2ChildIt) {
+      if (!matchesTokens(*L1ChildIt, *L2ChildIt))
+        return false;
+    }
+  }
+  return true;
+}
+MATCHER_P(matchesLine, line, "") { return matchesTokens(arg, line); }
+
+TEST_F(MacroUnexpanderTest, Identifier) {
+  auto Macros = create({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X");
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Unexp.addLine(line(Exp.getTokens()));
+  EXPECT_TRUE(Unexp.finished());
+  UnwrappedLine Result = Unexp.getResult();
+  Matcher U(Call);
+  EXPECT_THAT(Unexp.getResult(), matchesLine(line(U.consume(lex("X")))));
+}
+
+TEST_F(MacroUnexpanderTest, NestedLineWithinCall) {
+  auto Macros = create({"C(a)=class C { a; };"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("C", {"void f()"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  Unexp.addLine(line(E.consume(lex("class C {"))));
+  Unexp.addLine(line(E.consume(lex("void f();"))));
+  Unexp.addLine(line(E.consume(lex("};"))));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  EXPECT_THAT(Unexp.getResult(),
+              matchesLine(line(U.consume(lex("C(void f())")))));
+}
+
+TEST_F(MacroUnexpanderTest, StatementSequence) {
+  auto Macros = create({"SEMI=;"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call1 = Exp.expand("SEMI");
+  TokenList Call2 = Exp.expand("SEMI");
+  TokenList Call3 = Exp.expand("SEMI");
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  Unexp.addLine(line(E.consume(lex(";"))));
+  Unexp.addLine(line(E.consume(lex(";"))));
+  Unexp.addLine(line(E.consume(lex(";"))));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U1(Call1);
+  Matcher U2(Call2);
+  Matcher U3(Call3);
+  EXPECT_THAT(
+      Unexp.getResult(),
+      matchesLine(line(
+          {U1.consume(lex("SEMI")),
+           children({line({U2.consume(lex("SEMI")),
+                           children({line(U3.consume(lex("SEMI")))})})})})));
+}
+
+TEST_F(MacroUnexpanderTest, NestedBlock) {
+  auto Macros = create({"ID(x)=x"});
+  // Test: ID({ ID(a *b); })
+  // 1. expand ID(a *b) -> a *b
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("ID", {"a *b"});
+  // 2. expand ID({ a *b; })
+  TokenList Arg;
+  Arg.push_back(Lex.id("{"));
+  Arg.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+  Arg.push_back(Lex.id(";"));
+  Arg.push_back(Lex.id("}"));
+  Expansion Exp2(Lex, *Macros);
+  TokenList Call2 = Exp2.expand("ID", {Arg});
+
+  // Consume as-if formatted:
+  // {
+  //   a *b;
+  // }
+  UnexpandedMap Unexpanded =
+      mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+  MacroUnexpander Unexp(0, Unexpanded);
+  Matcher E(Exp2.getTokens());
+  Unexp.addLine(line(E.consume(lex("{"))));
+  Unexp.addLine(line(E.consume(lex("a *b;"))));
+  Unexp.addLine(line(E.consume(lex("}"))));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID({
+  //   ID(a *b);
+  // })
+  Matcher U1(Call1);
+  Matcher U2(Call2);
+  auto Chunk2Start = U2.consume(lex("ID("));
+  auto Chunk2LBrace = U2.consume(lex("{"));
+  U2.consume(lex("a *b"));
+  auto Chunk2Mid = U2.consume(lex(";"));
+  auto Chunk2RBrace = U2.consume(lex("}"));
+  auto Chunk2End = U2.consume(lex(")"));
+  auto Chunk1 = U1.consume(lex("ID(a *b)"));
+
+  auto Expected = line({Chunk2Start,
+                        children({
+                            line(Chunk2LBrace),
+                            line({Chunk1, Chunk2Mid}),
+                            line(Chunk2RBrace),
+                        }),
+                        Chunk2End});
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, NestedChildBlocks) {
+  auto Macros = create({"ID(x)=x", "CALL(x)=f([] { x })"});
+  // Test: ID(CALL(CALL(return a * b;)))
+  // 1. expand CALL(return a * b;)
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("CALL", {"return a * b;"});
+  // 2. expand CALL(f([] { return a * b; }))
+  Expansion Exp2(Lex, *Macros);
+  TokenList Call2 = Exp2.expand("CALL", {Exp1.getTokens()});
+  // 3. expand ID({ f([] { f([] { return a * b; }) }) })
+  TokenList Arg3;
+  Arg3.push_back(Lex.id("{"));
+  Arg3.append(Exp2.getTokens().begin(), Exp2.getTokens().end());
+  Arg3.push_back(Lex.id("}"));
+  Expansion Exp3(Lex, *Macros);
+  TokenList Call3 = Exp3.expand("ID", {Arg3});
+
+  // Consume as-if formatted in three unwrapped lines:
+  // 0: {
+  // 1:   f([] {
+  //        f([] {
+  //          return a * b;
+  //        })
+  //      })
+  // 2: }
+  UnexpandedMap Unexpanded = mergeUnexpanded(
+      Exp1.getUnexpanded(),
+      mergeUnexpanded(Exp2.getUnexpanded(), Exp3.getUnexpanded()));
+  MacroUnexpander Unexp(0, Unexpanded);
+  Matcher E(Exp3.getTokens());
+  Unexp.addLine(line(E.consume(lex("{"))));
+  Unexp.addLine(
+      line({E.consume(lex("f([] {")),
+            children({line({E.consume(lex("f([] {")),
+                            children({line(E.consume(lex("return a * b;")))}),
+                            E.consume(lex("})"))})}),
+            E.consume(lex("})"))}));
+  Unexp.addLine(line(E.consume(lex("}"))));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID(
+  //   {
+  //   CALL(CALL(return a * b;))
+  //   }
+  // )
+  Matcher U1(Call1);
+  Matcher U2(Call2);
+  Matcher U3(Call3);
+  auto Chunk3Start = U3.consume(lex("ID("));
+  auto Chunk3LBrace = U3.consume(lex("{"));
+  U3.consume(lex("f([] { f([] { return a * b; }) })"));
+  auto Chunk3RBrace = U3.consume(lex("}"));
+  auto Chunk3End = U3.consume(lex(")"));
+  auto Chunk2Start = U2.consume(lex("CALL("));
+  U2.consume(lex("f([] { return a * b; })"));
+  auto Chunk2End = U2.consume(lex(")"));
+  auto Chunk1 = U1.consume(lex("CALL(return a * b;)"));
+
+  auto Expected = line({
+      Chunk3Start,
+      children({
+          line(Chunk3LBrace),
+          line({
+              Chunk2Start,
+              Chunk1,
+              Chunk2End,
+          }),
+          line(Chunk3RBrace),
+      }),
+      Chunk3End,
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, NestedChildrenMultipleArguments) {
+  auto Macros = create({"CALL(a, b)=f([] { a; b; })"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("int a"), "int b"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  Unexp.addLine(line({
+      E.consume(lex("f([] {")),
+      children({
+          line(E.consume(lex("int a;"))),
+          line(E.consume(lex("int b;"))),
+      }),
+      E.consume(lex("})")),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  auto Expected = line(U.consume(lex("CALL(int a, int b)")));
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, ReverseOrderArgumentsInExpansion) {
+  auto Macros = create({"CALL(a, b)=b + a"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("x"), "y"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  Unexp.addLine(line(E.consume(lex("y + x"))));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  auto Expected = line(U.consume(lex("CALL(x, y)")));
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, MultipleToplevelUnwrappedLines) {
+  auto Macros = create({"ID(a, b)=a b"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("ID", {std::string("x; x"), "y"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  Unexp.addLine(line(E.consume(lex("x;"))));
+  Unexp.addLine(line(E.consume(lex("x y"))));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  auto Expected = line({
+      U.consume(lex("ID(")),
+      children({
+          line(U.consume(lex("x;"))),
+          line(U.consume(lex("x"))),
+      }),
+      U.consume(lex(", y)")),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, NestedCallsMultipleLines) {
+  auto Macros = create({"ID(x)=x"});
+  // Test: ID({ID(a * b);})
+  // 1. expand ID(a * b)
+  Expansion Exp1(Lex, *Macros);
+  TokenList Call1 = Exp1.expand("ID", {"a * b"});
+  // 2. expand ID({ a * b; })
+  Expansion Exp2(Lex, *Macros);
+  TokenList Arg2;
+  Arg2.push_back(Lex.id("{"));
+  Arg2.append(Exp1.getTokens().begin(), Exp1.getTokens().end());
+  Arg2.push_back(Lex.id(";"));
+  Arg2.push_back(Lex.id("}"));
+  TokenList Call2 = Exp2.expand("ID", {Arg2});
+
+  // Consume as-if formatted in three unwrapped lines:
+  // 0: {
+  // 1:   a * b;
+  // 2: }
+  UnexpandedMap Unexpanded =
+      mergeUnexpanded(Exp1.getUnexpanded(), Exp2.getUnexpanded());
+  MacroUnexpander Unexp(0, Unexpanded);
+  Matcher E(Exp2.getTokens());
+  Unexp.addLine(line(E.consume(lex("{"))));
+  Unexp.addLine(line(E.consume(lex("a * b;"))));
+  Unexp.addLine(line(E.consume(lex("}"))));
+  EXPECT_TRUE(Unexp.finished());
+
+  // Expect lines:
+  // ID(
+  //     {
+  //     ID(a * b);
+  //     }
+  // )
+  Matcher U1(Call1);
+  Matcher U2(Call2);
+  auto Chunk2Start = U2.consume(lex("ID("));
+  auto Chunk2LBrace = U2.consume(lex("{"));
+  U2.consume(lex("a * b"));
+  auto Chunk2Semi = U2.consume(lex(";"));
+  auto Chunk2RBrace = U2.consume(lex("}"));
+  auto Chunk2End = U2.consume(lex(")"));
+  auto Chunk1 = U1.consume(lex("ID(a * b)"));
+
+  auto Expected = line({
+      Chunk2Start,
+      children({
+          line({Chunk2LBrace}),
+          line({Chunk1, Chunk2Semi}),
+          line({Chunk2RBrace}),
+      }),
+      Chunk2End,
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, ParentOutsideMacroCall) {
+  auto Macros = create({"ID(a)=a"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("ID", {std::string("x; y; z;")});
+
+  auto Prefix = tokens("int a = []() {");
+  auto Postfix = tokens("}();");
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  Unexp.addLine(line({
+      Prefix,
+      children({
+          line(E.consume(lex("x;"))),
+          line(E.consume(lex("y;"))),
+          line(E.consume(lex("z;"))),
+      }),
+      Postfix,
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  auto Expected = line({
+      Prefix,
+      children({
+          line({
+              U.consume(lex("ID(")),
+              children({
+                  line(U.consume(lex("x;"))),
+                  line(U.consume(lex("y;"))),
+                  line(U.consume(lex("z;"))),
+              }),
+              U.consume(lex(")")),
+          }),
+      }),
+      Postfix,
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, UnusedMacroArguments) {
+  auto Macros = create({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X", {"a", "b", "c"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Unexp.addLine(line(Exp.getTokens()));
+  EXPECT_TRUE(Unexp.finished());
+  UnwrappedLine Result = Unexp.getResult();
+  Matcher U(Call);
+  EXPECT_THAT(Unexp.getResult(),
+              matchesLine(line(U.consume(lex("X(a, b, c)")))));
+}
+
+TEST_F(MacroUnexpanderTest, UnusedEmptyMacroArgument) {
+  auto Macros = create({"X=x"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("X", {std::string("")});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  auto Semi = tokens(";");
+  Unexp.addLine(line({E.consume(lex("x")), Semi}));
+  EXPECT_TRUE(Unexp.finished());
+  UnwrappedLine Result = Unexp.getResult();
+  Matcher U(Call);
+  EXPECT_THAT(Unexp.getResult(),
+              matchesLine(line({U.consume(lex("X()")), Semi})));
+}
+
+TEST_F(MacroUnexpanderTest, ChildrenSplitAcrossArguments) {
+  auto Macros = create({"CALL(a, b)=f([]() a b)"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("{ a;"), "b; }"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  Unexp.addLine(line({
+      E.consume(lex("f([]() {")),
+      children({
+          line(E.consume(lex("a;"))),
+          line(E.consume(lex("b;"))),
+      }),
+      E.consume(lex("})")),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  auto Expected = line({
+      U.consume(lex("CALL({")),
+      children(line(U.consume(lex("a;")))),
+      U.consume(lex(", b; })")),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, ChildrenAfterMacroCall) {
+  auto Macros = create({"CALL(a, b)=f([]() a b"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("CALL", {std::string("{ a"), "b"});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  auto Semi = tokens(";");
+  auto SecondLine = tokens("c d;");
+  auto ThirdLine = tokens("e f;");
+  auto Postfix = tokens("})");
+  Unexp.addLine(line({
+      E.consume(lex("f([]() {")),
+      children({
+          line({E.consume(lex("a b")), Semi}),
+          line(SecondLine),
+          line(ThirdLine),
+      }),
+      Postfix,
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  auto Expected = line({
+      U.consume(lex("CALL({")),
+      children(line(U.consume(lex("a")))),
+      U.consume(lex(", b)")),
+      Semi,
+      children(line({
+          SecondLine,
+          children(line({
+              ThirdLine,
+              Postfix,
+          })),
+      })),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+TEST_F(MacroUnexpanderTest, InvalidCodeSplittingBracesAcrossArgs) {
+  auto Macros = create({"M(a, b)=(a) (b)"});
+  Expansion Exp(Lex, *Macros);
+  TokenList Call = Exp.expand("M", {std::string("{"), "x", ""});
+
+  MacroUnexpander Unexp(0, Exp.getUnexpanded());
+  Matcher E(Exp.getTokens());
+  auto Prefix = tokens("({");
+  Unexp.addLine(line({
+      Prefix,
+      children({
+          line({
+              E.consume(lex("({")),
+              children({line(E.consume(lex(")(x)")))}),
+          }),
+      }),
+  }));
+  EXPECT_TRUE(Unexp.finished());
+  Matcher U(Call);
+  auto Expected = line({
+      Prefix,
+      children({line(U.consume(lex("M({,x,)")))}),
+  });
+  EXPECT_THAT(Unexp.getResult(), matchesLine(Expected));
+}
+
+} // namespace
+} // namespace format
+} // namespace clang
Index: clang/unittests/Format/CMakeLists.txt
===================================================================
--- clang/unittests/Format/CMakeLists.txt
+++ clang/unittests/Format/CMakeLists.txt
@@ -16,6 +16,7 @@
   FormatTestTableGen.cpp
   FormatTestTextProto.cpp
   MacroExpanderTest.cpp
+  MacroUnexpanderTest.cpp
   NamespaceEndCommentsFixerTest.cpp
   SortImportsTestJS.cpp
   SortImportsTestJava.cpp
Index: clang/lib/Format/Macros.h
===================================================================
--- clang/lib/Format/Macros.h
+++ clang/lib/Format/Macros.h
@@ -37,26 +37,22 @@
 #ifndef CLANG_LIB_FORMAT_MACROS_H
 #define CLANG_LIB_FORMAT_MACROS_H
 
+#include <list>
+#include <map>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
-#include "Encoding.h"
 #include "FormatToken.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 
-namespace llvm {
-class MemoryBuffer;
-} // namespace llvm
-
 namespace clang {
-class IdentifierTable;
-class SourceManager;
-
 namespace format {
-struct FormatStyle;
+
+struct UnwrappedLine;
+struct UnwrappedLineNode;
 
 /// Takes a set of macro definitions as strings and allows expanding calls to
 /// those macros.
@@ -135,6 +131,197 @@
   llvm::StringMap<Definition> Definitions;
 };
 
+/// Matches formatted lines that were created by macro expansion to a format
+/// for the original macro call.
+///
+/// Given a mapping from the macro name identifier token in the macro call
+/// to the tokens of the macro call, for example:
+/// CLASSA -> CLASSA({public: void x();})
+///
+/// When getting the formatted lines of the expansion via the \c addLine method:
+/// -> class A {
+/// -> public:
+/// ->   void x();
+/// -> };
+///
+/// Creates the unwrapped lines containing the macro call tokens so that
+/// the macro call tokens fit the semantic structure of the expanded formatted
+/// lines:
+/// -> CLASSA({
+/// -> public:
+/// ->   void x();
+/// -> })
+class MacroUnexpander {
+public:
+  /// Create an Unexpander whose resulting unexpanded line will start at
+  /// \p Level, unexpanding using the map from name identifier token
+  /// to the corresponding tokens of the original macro call.
+  MacroUnexpander(unsigned Level,
+                  const std::map<FormatToken *, std::unique_ptr<UnwrappedLine>>
+                      &Unexpanded);
+
+  /// For the given \p Line, match all occurences of tokens expanded from a
+  /// macro and replace them with the original macro call in \c getResult().
+  void addLine(const UnwrappedLine &Line);
+
+  /// Check whether at the current state there is no open macro expansion
+  /// that needs to be processed to finish an macro call.
+  bool finished() { return State == InProgress && Unexpanded.empty(); }
+
+  /// Retrieve the formatted \c UnwrappedLine containing the orginal
+  /// macro calls, formatted according to the expanded token stream received
+  /// via \c addLine().
+  /// Generally, this line tries to have the same structure as the expanded,
+  /// formatted unwrapped lines handed in via \c addLine():
+  /// If a token in a macro argument is a child of a token in the expansion,
+  /// the parent will be the corresponding token in the macro call.
+  /// For example:
+  /// #define C(a, b) class C { a b
+  /// C(int x;, int y;) would expand to class C { int x; int y; where in a
+  /// formatted line "int x;" and "int y;" would both be new separate lines (at
+  /// different indentation levels). In the result, "int x;" will be a child of
+  /// the opening parenthesis in "C(" and "int y;" will be a child of the ","
+  /// token.
+  UnwrappedLine getResult();
+
+private:
+  void forEachToken(
+      const UnwrappedLine &Line,
+      const std::function<void(FormatToken *, FormatToken *, bool)> &Call,
+      FormatToken *Parent = nullptr);
+  void add(FormatToken *Token, FormatToken *OriginalParent, bool First);
+  void prepareParent(FormatToken *OriginalParent, bool First);
+  FormatToken *getParentInOutput(FormatToken *Parent);
+  void unexpand(FormatToken *Token);
+  void startUnexpansion(FormatToken *Token);
+  bool continueUnexpansionUntil(FormatToken *Token);
+  void endUnexpansion(FormatToken *Token);
+  bool processNextUnexpanded();
+  void finalize();
+
+  struct Line;
+
+  void pushToken(FormatToken *Token, Line *L = nullptr);
+  UnwrappedLine createUnwrappedLine(const Line &Line, int Level);
+  void debug(const Line &Line, int Level);
+  Line &parentLine();
+  Line *currentLine();
+
+  enum UnexpanderState {
+    Start,      // No macro expansion was found in the input yet.
+    InProgress, // During a macro unexpansion.
+    Finalized,  // Past macro unexpansion, the result is finalized.
+  };
+  UnexpanderState State = Start;
+
+  // Node in which we build up the resulting unwrapped line; this type is
+  // analogous to UnwrappedLineNode.
+  struct LineNode {
+    LineNode() = default;
+    LineNode(FormatToken *Tok) : Tok(Tok) {}
+    FormatToken *Tok = nullptr;
+    llvm::SmallVector<std::unique_ptr<Line>, 4> Children;
+  };
+
+  // Line in which we build up the resulting unwrapped line.
+  // FIXME: Investigate changing UnwrappedLine to a pointer type and using it
+  // instead of rolling our own type.
+  struct Line {
+    llvm::SmallVector<std::unique_ptr<LineNode>, 4> Tokens;
+  };
+
+  // The line in which we collect the resulting unexpanded output.
+  // To reduce special cases in the algorithm, the first level of the line
+  // contains a single null token that has the unexpanded incoming
+  // lines as children.
+  // In the end, we stich the lines together so that each subsequent line
+  // is a child of the last token of the previous line. This is necessary
+  // in order to format the overall expression as a single logical line -
+  // if we created separate lines, we'd format them with their own top-level
+  // indent depending on the semantic structure, which is not desired.
+  Line Output;
+
+  // Stack of currently "open" lines, where each line's predecessor's last
+  // token is the parent token for that line.
+  llvm::SmallVector<Line *, 4> Lines;
+
+  // Maps from the original token to the token that takes its place in the
+  // unexpanded token stream in terms of parent-child relationships.
+  // Note that it might take multiple steps to arrive at the correct
+  // parent in the output.
+  // Given: C(a, b) []() { a; b; }
+  // And a call: C(f(), g())
+  // The structure in the incoming formatted unwrapped line will be:
+  // []() {
+  //      |- f();
+  //      \- g();
+  // }
+  // with f and g being children of the opening brace.
+  // In the unexpanded call:
+  // C(f(), g())
+  //  \- f()
+  //      \- g()
+  // We want f to be a child of the opening parenthesis and g to be a child
+  // of the comma token in the macro call.
+  // Thus, we map
+  // { -> (
+  // and add
+  // ( -> ,
+  // once we're past the comma in the unexpansion.
+  llvm::DenseMap<FormatToken *, FormatToken *> TokenToParentInOutput;
+
+  // Keeps track of a single expansion while we're unexpanding tokens it
+  // generated.
+  struct Expansion {
+    // The identifier token of the macro call.
+    FormatToken *ID;
+    // Our current position in the unexpansion.
+    std::list<UnwrappedLineNode>::iterator I;
+    // The end of the unexpanded token sequence.
+    std::list<UnwrappedLineNode>::iterator End;
+  };
+
+  // Stack of unexpanded macro calls for which we're in the middle
+  // of an expansion.
+  llvm::SmallVector<Expansion, 2> Unexpanded;
+
+  struct MacroCallState {
+    Line *Line;
+    FormatToken *RedirectParentFrom;
+    FormatToken *RedirectParentTo;
+  };
+
+  // Keeps track of the lines into which the opening brace/parenthesis &
+  // argument separating commas for each level in the macro call go in order to
+  // put the corresponding closing brace/parenthesis into the same line in the
+  // output and keep track of which parents in the expanded token stream map to
+  // which tokens in the unexpanded stream.
+  // When an opening brace/parenthesis has children, we want the structure of
+  // the output line to be:
+  // |- MACRO
+  // |- (
+  // |  \- <argument>
+  // |- ,
+  // |  \- <argument>
+  // \- )
+  llvm::SmallVector<MacroCallState, 4> MacroCallStructure;
+
+  // For each token, the previous token in the resulting unexpanded token
+  // stream.
+  llvm::DenseMap<FormatToken *, LineNode *> TokenToPrevious;
+
+  // The previous token's node we put into the resulting unexpanded token
+  // stream.
+  LineNode *PreviousNode = nullptr;
+
+  // Level the generated UnwrappedLine will be at.
+  const unsigned Level;
+
+  // Maps from identifier of the macro call to an unwrapped line containing
+  // all tokens of the macro call.
+  const std::map<FormatToken *, std::unique_ptr<UnwrappedLine>> &IdToUnexpanded;
+};
+
 } // namespace format
 } // namespace clang
 
Index: clang/lib/Format/MacroUnexpander.cpp
===================================================================
--- /dev/null
+++ clang/lib/Format/MacroUnexpander.cpp
@@ -0,0 +1,481 @@
+//===--- MacroUnexpander.cpp - Format C++ code ------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of MacroUnexpander, which fits an
+/// unexpanded macro call to a parsed set of UnwrappedLines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "Macros.h"
+
+#include "UnwrappedLineParser.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "format-unexpand"
+
+namespace clang {
+namespace format {
+
+MacroUnexpander::MacroUnexpander(
+    unsigned Level,
+    const std::map<FormatToken *, std::unique_ptr<UnwrappedLine>> &Unexpanded)
+    : Level(Level), IdToUnexpanded(Unexpanded) {
+  Output.Tokens.push_back(std::make_unique<LineNode>());
+  Lines.push_back(&Output);
+}
+
+void MacroUnexpander::addLine(const UnwrappedLine &Line) {
+  assert(State != Finalized);
+  LLVM_DEBUG(llvm::dbgs() << "Unex: new line...\n");
+  forEachToken(Line, [&](FormatToken *Token, FormatToken *Parent, bool First) {
+    add(Token, Parent, First);
+  });
+}
+
+UnwrappedLine MacroUnexpander::getResult() {
+  finalize();
+  UnwrappedLine Final =
+      createUnwrappedLine(*Output.Tokens.front()->Children.front(), Level);
+  assert(!Final.Tokens.empty());
+  return Final;
+}
+
+// Call \p Call for each token in the unwrapped line given, passing
+// the token, its parent and whether it is the first token in the line.
+void MacroUnexpander::forEachToken(
+    const UnwrappedLine &Line,
+    const std::function<void(FormatToken *, FormatToken *, bool)> &Call,
+    FormatToken *Parent) {
+  bool First = true;
+  for (const auto &N : Line.Tokens) {
+    Call(N.Tok, Parent, First);
+    First = false;
+    for (const auto &Child : N.Children) {
+      forEachToken(Child, Call, N.Tok);
+    }
+  }
+}
+
+// Unexand \p Token, given its parent \p OriginalParent in the incoming
+// unwrapped line. \p First specifies whether it is the first token in a given
+// unwrapped line.
+void MacroUnexpander::add(FormatToken *Token, FormatToken *OriginalParent,
+                          bool First) {
+  LLVM_DEBUG(
+      llvm::dbgs() << "Unex: Token: " << Token->TokenText << ", Parent: "
+                   << (OriginalParent ? OriginalParent->TokenText : "<null>")
+                   << ", First: " << First << "\n");
+  // In order to be able to find the correct parent in the unexpanded token
+  // stream, we need to continue the unexpansion until we find the right token
+  // if it is part of the unexpanded token stream.
+  // Note that hidden tokens can be part of the unexpanded stream in nested
+  // macro calls.
+  // For example, given: BRACED(a) {a}
+  // And the call: BRACED(BRACED(x))
+  // The outer macro call will be BRACED({a}), and the hidden tokens '{' and
+  // '}' can be found in the unexpanded macro stream of that level.
+  if (!Unexpanded.empty() && Token->MacroCtx &&
+      (Token->MacroCtx->Role != MR_Hidden ||
+       Unexpanded.size() != Token->MacroCtx->ExpandedFrom.size())) {
+    if (continueUnexpansionUntil(Token))
+      First = true;
+  }
+
+  prepareParent(OriginalParent, First);
+
+  if (!Token->MacroCtx) {
+    // If this token was not generated by a macro call, we add it to the current
+    // line.
+    pushToken(Token);
+  } else {
+    // Otherwise add the unexpanded equivalent of the token.
+    unexpand(Token);
+  }
+}
+
+// Ensures that:
+// Lines.back() is the line that has \p OriginalParent or its unexpanded
+// replacement token as a parent (when possible) - that is, the last token in
+// \c Lines[Lines.size()-2] is the parent of Lines.back() in the
+// unexpanded unwrapped line.
+void MacroUnexpander::prepareParent(FormatToken *OriginalParent, bool First) {
+  // We want to find the parent in the new unwrapped line, where the original
+  // parent might have been replaced by an unxpansion.
+  FormatToken *Parent = getParentInOutput(OriginalParent);
+  LLVM_DEBUG(llvm::dbgs() << "Unex: New parent: "
+                          << (Parent ? Parent->TokenText : "<null>") << "\n");
+
+  FormatToken *OpenMacroParent = nullptr;
+  if (!MacroCallStructure.empty()) {
+    // Inside a macro expansion, it is possible to lose track of the correct
+    // parent - either because it is already popped, for example because it was
+    // in a different macro argument (e.g. M({, })), or when we work on invalid
+    // code.
+    // Thus, we use the innermost macro call's parent as the parent at which
+    // we stop; this allows us to stay within the macro expansion and keeps
+    // any problems confined to the extent of the macro call.
+    OpenMacroParent =
+        getParentInOutput(MacroCallStructure.back().RedirectParentTo);
+  }
+  if (First || (!Lines.back()->Tokens.empty() &&
+                Parent == Lines.back()->Tokens.back()->Tok)) {
+    // If we are at the first token in a new line, we want to also
+    // create a new line in the resulting unexpanded unwrapped line.
+    while (Lines.back()->Tokens.empty() ||
+           (Parent != Lines.back()->Tokens.back()->Tok &&
+            Lines.back()->Tokens.back()->Tok != OpenMacroParent)) {
+      Lines.pop_back();
+      assert(!Lines.empty());
+    }
+    assert(!Lines.empty());
+    Lines.back()->Tokens.back()->Children.push_back(std::make_unique<Line>());
+    Lines.push_back(&*Lines.back()->Tokens.back()->Children.back());
+  } else if (parentLine().Tokens.back()->Tok != Parent) {
+    // If we're not the first token in a new line, pop lines until we find
+    // the child of \c Parent in the stack.
+    while (Parent != parentLine().Tokens.back()->Tok &&
+           parentLine().Tokens.back()->Tok &&
+           parentLine().Tokens.back()->Tok != OpenMacroParent) {
+      Lines.pop_back();
+      assert(!Lines.empty());
+    }
+  }
+  assert(!Lines.empty());
+}
+
+// For a given \p Parent in the incoming expanded token stream, find the
+// corresponding parent in the output.
+FormatToken *MacroUnexpander::getParentInOutput(FormatToken *Parent) {
+  auto I = TokenToParentInOutput.find(Parent);
+  if (I == TokenToParentInOutput.end())
+    return Parent;
+  for (; I != TokenToParentInOutput.end();
+       I = TokenToParentInOutput.find(Parent)) {
+    Parent = I->second;
+  }
+  // If we use a different token than the parent in the expanded token stream
+  // as parent, mark it as a special parent, so the formatting code knows it
+  // needs to have its children formatted.
+  Parent->MacroParent = true;
+  return Parent;
+}
+
+// Unexpand a \p Token that was expanded from a macro call.
+void MacroUnexpander::unexpand(FormatToken *Token) {
+  assert(Token->MacroCtx);
+  // A single token can be the only result of a macro call:
+  // Given: ID(x, y) ;
+  // And the call: ID(<some>, <tokens>)
+  // ';' in the expanded stream will unexpand all of ID(<some>, <tokens>).
+  if (Token->MacroCtx->StartOfExpansion) {
+    startUnexpansion(Token);
+    // If the order of tokens in the expanded token stream is not the
+    // same as the order of tokens in the unexpanded stream, we need
+    // to unexpand tokens that arrive later in the stream.
+    if (Token->MacroCtx->Role != MR_Hidden) {
+      continueUnexpansionUntil(Token);
+    }
+  }
+  assert(!Unexpanded.empty());
+  if (Unexpanded.back().I != Unexpanded.back().End) {
+    assert(Unexpanded.size() == Token->MacroCtx->ExpandedFrom.size());
+    if (Token->MacroCtx->Role != MR_Hidden) {
+      // The current token in the unexpanded token stream must be the token
+      // we're looking for - we either arrive here after startUnexpansion,
+      // which initiates the stream to the first token, or after
+      // continueExpansionUntil skipped until the expected token in the
+      // unexpanded stream at the start of add(...).
+      assert(Unexpanded.back().I->Tok == Token);
+      processNextUnexpanded();
+    } else if (!currentLine()->Tokens.empty()) {
+      // FIXME:assert(!currentLine()->Tokens.empty());
+      // Map all hidden tokens to the last visible token in the output.
+      // If the hidden token is a parent, we'll use the last visible
+      // token as the parent of the hidden token's children.
+      TokenToParentInOutput[Token] = currentLine()->Tokens.back()->Tok;
+    } else {
+      for (auto I = Lines.rbegin(), E = Lines.rend(); I != E; ++I) {
+        if (!(*I)->Tokens.empty()) {
+          TokenToParentInOutput[Token] = (*I)->Tokens.back()->Tok;
+
+          break;
+        }
+      }
+      llvm::dbgs() << "CURRENT LINE TOKENS EMPTY!!\n";
+    }
+  }
+  if (Token->MacroCtx->EndOfExpansion)
+    endUnexpansion(Token);
+}
+
+// Given a \p Token that starts an expansion, unexpand the beginning of the
+// macro call.
+// For example, given: ID(x) x
+// And the call: ID(int a)
+// Unexpands: ID(
+void MacroUnexpander::startUnexpansion(FormatToken *Token) {
+  assert(Token->MacroCtx);
+  assert(!Token->MacroCtx->ExpandedFrom.empty());
+  assert(Unexpanded.size() <= Token->MacroCtx->ExpandedFrom.size());
+#ifndef NDEBUG
+  // Check that the token's unexpansion stack matches our current unexpansion
+  // stack.
+  for (size_t I = 0; I < Unexpanded.size(); ++I) {
+    assert(Unexpanded[I].ID ==
+           Token->MacroCtx
+               ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I]);
+  }
+#endif
+  // Start unexpansion for all calls for which this token is the first token
+  // generated by the call.
+  for (size_t I = Unexpanded.size(); I < Token->MacroCtx->ExpandedFrom.size();
+       ++I) {
+    FormatToken *ID =
+        Token->MacroCtx
+            ->ExpandedFrom[Token->MacroCtx->ExpandedFrom.size() - 1 - I];
+    // We found a macro call to be unexpanded; the next time our unexpansion
+    // stack is empty we know we finished an unexpansion.
+    State = InProgress;
+    // Put the unexpanded macro call's token into our unexpansion stack.
+    auto IU = IdToUnexpanded.find(ID);
+    assert(IU != IdToUnexpanded.end());
+    Unexpanded.push_back(
+        {ID, IU->second->Tokens.begin(), IU->second->Tokens.end()});
+    // Process the macro call's identifier.
+    processNextUnexpanded();
+    if (Unexpanded.back().I == Unexpanded.back().End)
+      continue;
+    assert(Unexpanded.back().I->Tok->is(tok::l_paren));
+    // Process the optional opening parenthesis.
+    processNextUnexpanded();
+  }
+}
+
+// Add all tokens in the unexpansion stream to the output until we find the
+// given \p Token.
+bool MacroUnexpander::continueUnexpansionUntil(FormatToken *Token) {
+  assert(!Unexpanded.empty());
+  bool PassedMacroComma = false;
+  // FIXME: If Token was already expanded earlier, due to
+  // a change in order, we will not find it, but need to
+  // skip it.
+  for (; Unexpanded.back().I != Unexpanded.back().End &&
+         Unexpanded.back().I->Tok != Token;) {
+    PassedMacroComma = processNextUnexpanded() || PassedMacroComma;
+  }
+  assert(Unexpanded.back().I == Unexpanded.back().End ||
+         Unexpanded.back().I->Tok == Token);
+  return PassedMacroComma;
+}
+
+// End all unexpansions for which \p Token is the final token.
+void MacroUnexpander::endUnexpansion(FormatToken *Token) {
+  assert(!Token->MacroCtx ||
+         (Unexpanded.size() >= Token->MacroCtx->EndOfExpansion));
+  if (!Token->MacroCtx)
+    return;
+  for (size_t I = 0; I < Token->MacroCtx->EndOfExpansion; ++I) {
+#ifndef NDEBUG
+    // Check all remaining tokens but the final closing parenthesis and optional
+    // trailing comment were already expanded at an inner expansion level.
+    for (auto T = Unexpanded.back().I; T != Unexpanded.back().End; ++T) {
+      FormatToken *Token = T->Tok;
+      bool ClosingParen = (std::next(T) == Unexpanded.back().End ||
+                           std::next(T)->Tok->isTrailingComment()) &&
+                          !Token->MacroCtx && Token->is(tok::r_paren);
+      bool TrailingComment = Token->isTrailingComment();
+      bool PreviousLevel =
+          Token->MacroCtx &&
+          (Unexpanded.size() < Token->MacroCtx->ExpandedFrom.size());
+      if (!ClosingParen && !TrailingComment && !PreviousLevel) {
+        llvm::dbgs() << "At token: " << Token->TokenText << "\n";
+      }
+      // In addition to the following cases, we can also run into this
+      // when a macro call had more arguments than expected; in that case,
+      // the comma and the remaining tokens in the macro call will potentially
+      // end up in the line when we finishe the expansion.
+      // FIXME: Add the information which arguments are unused, and assert
+      // one of the cases below plus unexpanded macro argument tokens.
+      // assert(ClosingParen || TrailingComment || PreviousLevel);
+    }
+#endif
+    // Expand the closing parenthesis, if it exists, including an optional
+    // trailing comment.
+    for (auto T = Unexpanded.back().I; T != Unexpanded.back().End; ++T) {
+      processNextUnexpanded();
+    }
+    Unexpanded.pop_back();
+  }
+}
+
+// If visible, add the next token of the unexpanded token sequence to the
+// output.
+bool MacroUnexpander::processNextUnexpanded() {
+  FormatToken *Token = Unexpanded.back().I->Tok;
+  // llvm::dbgs() << "ProcessNext: " << Token->TokenText << "\n";
+  ++Unexpanded.back().I;
+  if (Token->MacroCtx) {
+    // Skip tokens that are not part of the macro call.
+    if (Token->MacroCtx->Role == MR_Hidden) {
+      return false;
+    }
+    // Skip tokens we already expanded during an inner unexpansion.
+    // For example, given: ID(x) {x}
+    // And the call: ID(ID(f))
+    // We get two unexpansions:
+    // ID(f) -> {f}
+    // ID({f}) -> {{f}}
+    // We unexpand f during the first unexpansion, and skip it during the second
+    // unexpansion.
+    if (Unexpanded.size() < Token->MacroCtx->ExpandedFrom.size()) {
+      return false;
+    }
+  }
+  // Put the parentheses and commas of a macro call into the same line;
+  // if the arguments produce new unwrapped lines, they will become children
+  // of the corresponding opening parenthesis or comma tokens in the
+  // unexpanded call.
+  if (!Token->MacroCtx && Token->isOneOf(tok::comma, tok::r_paren) &&
+      !MacroCallStructure.empty()) {
+    if (Token->is(tok::comma)) {
+      TokenToParentInOutput
+          [MacroCallStructure.back().Line->Tokens.back()->Tok] = Token;
+      Token->MacroParent = true;
+      pushToken(Token, MacroCallStructure.back().Line);
+      prepareParent(Token, /*First=*/true);
+      return true;
+    }
+    assert(Token->is(tok::r_paren));
+    pushToken(Token, MacroCallStructure.back().Line);
+    TokenToParentInOutput.erase(MacroCallStructure.back().RedirectParentFrom);
+    MacroCallStructure.pop_back();
+    if (!MacroCallStructure.empty()) {
+      // We might have overwritten the previous state's parent.
+      TokenToParentInOutput[MacroCallStructure.back().RedirectParentFrom] =
+          MacroCallStructure.back().RedirectParentTo;
+    }
+    return false;
+  }
+  if (!Token->MacroCtx && Token->is(tok::l_paren)) {
+    MacroCallStructure.push_back(
+        {currentLine(), parentLine().Tokens.back()->Tok, Token});
+    TokenToParentInOutput[MacroCallStructure.back().RedirectParentFrom] = Token;
+    pushToken(Token);
+    prepareParent(Token, /*First=*/true);
+    Token->MacroParent = true;
+    return false;
+  }
+  // Note that any tokens that are tagged with MR_None have been passed as
+  // arguments to the macro that have not been expanded, for example:
+  // Given: ID(X) x
+  // When calling: ID(a, b)
+  // 'b' will be part of the unexpanded token stream, but tagged MR_None.
+  // Given that erroring out in this case would be disruptive, we continue
+  // pushing the (unformatted) token.
+  // FIXME: This can lead to unfortunate formatting decisions - give the user
+  // a hint that their macro definition is broken.
+  pushToken(Token);
+  return false;
+}
+
+void MacroUnexpander::finalize() {
+  if (State == Finalized)
+    return;
+  assert(State == InProgress && Unexpanded.empty());
+  State = Finalized;
+
+  // We created corresponding unwrapped lines for each incoming line as children
+  // the the toplevel null token.
+  assert(Output.Tokens.size() == 1 && !Output.Tokens.front()->Children.empty());
+
+  // The first line becomes the top level line in the resuling unwrapped line.
+  auto *I = Output.Tokens.front()->Children.begin();
+  ++I;
+  LLVM_DEBUG({
+    llvm::dbgs() << "Finalizing unexpanded lines:\n";
+    debug(Output, 0);
+  });
+  for (auto *E = Output.Tokens.front()->Children.end(); I != E; ++I) {
+    if ((*I)->Tokens.empty())
+      continue;
+
+    // Every subsequent line will become a child of the last token in the
+    // previous line, which is the token prior to the first token in the line.
+    auto L = TokenToPrevious.find((*I)->Tokens.front()->Tok);
+    assert(L != TokenToPrevious.end());
+    assert(L->second->Children.empty());
+    L->second->Children.push_back(std::move(*I));
+
+    // Mark the previous line's last token as generated by a macro expansion
+    // so the formatting algorithm can take that into account.
+    L->second->Tok->MacroParent = true;
+  }
+  Output.Tokens.front()->Children.resize(1);
+}
+
+void MacroUnexpander::pushToken(FormatToken *Token, Line *L) {
+  L = L ? L : currentLine();
+  LLVM_DEBUG(llvm::dbgs() << "-> " << Token->TokenText << "\n");
+  L->Tokens.push_back(std::make_unique<LineNode>(Token));
+  if (PreviousNode != nullptr) {
+    assert(TokenToPrevious.find(Token) == TokenToPrevious.end());
+    TokenToPrevious[Token] = PreviousNode;
+  }
+  PreviousNode = &*L->Tokens.back();
+}
+
+UnwrappedLine MacroUnexpander::createUnwrappedLine(const Line &Line,
+                                                   int Level) {
+  UnwrappedLine Result;
+  Result.Level = Level;
+  for (const auto &N : Line.Tokens) {
+    Result.Tokens.push_back(N->Tok);
+    UnwrappedLineNode &Current = Result.Tokens.back();
+    for (const auto &Child : N->Children) {
+      if (Child->Tokens.empty())
+        continue;
+      Current.Children.push_back(createUnwrappedLine(*Child, Level + 1));
+    }
+    if (Current.Children.size() == 1 &&
+        Current.Tok->isOneOf(tok::l_paren, tok::comma)) {
+      Result.Tokens.splice(Result.Tokens.end(),
+                           Current.Children.front().Tokens);
+      Current.Children.clear();
+    }
+  }
+  return Result;
+}
+
+void MacroUnexpander::debug(const Line &Line, int Level) {
+  for (int i = 0; i < Level; ++i)
+    llvm::dbgs() << " ";
+  for (const auto &N : Line.Tokens) {
+    if (!N)
+      continue;
+    if (N->Tok)
+      llvm::dbgs() << N->Tok->TokenText << " ";
+    for (const auto &Child : N->Children) {
+      llvm::dbgs() << "\n";
+      debug(*Child, Level + 1);
+      for (int i = 0; i < Level; ++i)
+        llvm::dbgs() << " ";
+    }
+  }
+  llvm::dbgs() << "\n";
+}
+
+MacroUnexpander::Line &MacroUnexpander::parentLine() {
+  return **std::prev(std::prev(Lines.end()));
+}
+
+MacroUnexpander::Line *MacroUnexpander::currentLine() { return Lines.back(); }
+
+} // namespace format
+} // namespace clang
\ No newline at end of file
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -446,6 +446,10 @@
   // in a configured macro expansion.
   llvm::Optional<MacroExpansion> MacroCtx;
 
+  /// When macro expansion introduces parents, those are marked as
+  /// \c MacroParent, so formatting knows their children need to be formatted.
+  bool MacroParent = false;
+
   bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
   bool is(TokenType TT) const { return getType() == TT; }
   bool is(const IdentifierInfo *II) const {
Index: clang/lib/Format/CMakeLists.txt
===================================================================
--- clang/lib/Format/CMakeLists.txt
+++ clang/lib/Format/CMakeLists.txt
@@ -8,6 +8,7 @@
   FormatToken.cpp
   FormatTokenLexer.cpp
   MacroExpander.cpp
+  MacroUnexpander.cpp
   NamespaceEndCommentsFixer.cpp
   SortJavaScriptImports.cpp
   TokenAnalyzer.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D88299: [clang-forma... Manuel Klimek via Phabricator via cfe-commits

Reply via email to