This revision was automatically updated to reflect the committed changes.
Closed by commit rG1ad15046dcf6: [Syntax] Allow to mutate syntax trees 
(authored by ilya-biryukov).

Changed prior to commit:
  https://reviews.llvm.org/D64573?vs=234494&id=234496#toc

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64573/new/

https://reviews.llvm.org/D64573

Files:
  clang/include/clang/Tooling/Syntax/BuildTree.h
  clang/include/clang/Tooling/Syntax/Mutations.h
  clang/include/clang/Tooling/Syntax/Tokens.h
  clang/include/clang/Tooling/Syntax/Tree.h
  clang/lib/Tooling/Syntax/BuildTree.cpp
  clang/lib/Tooling/Syntax/CMakeLists.txt
  clang/lib/Tooling/Syntax/ComputeReplacements.cpp
  clang/lib/Tooling/Syntax/Mutations.cpp
  clang/lib/Tooling/Syntax/Synthesis.cpp
  clang/lib/Tooling/Syntax/Tokens.cpp
  clang/lib/Tooling/Syntax/Tree.cpp
  clang/unittests/Tooling/Syntax/CMakeLists.txt
  clang/unittests/Tooling/Syntax/TreeTest.cpp

Index: clang/unittests/Tooling/Syntax/TreeTest.cpp
===================================================================
--- clang/unittests/Tooling/Syntax/TreeTest.cpp
+++ clang/unittests/Tooling/Syntax/TreeTest.cpp
@@ -9,14 +9,24 @@
 #include "clang/Tooling/Syntax/Tree.h"
 #include "clang/AST/ASTConsumer.h"
 #include "clang/AST/Decl.h"
+#include "clang/AST/Stmt.h"
+#include "clang/Basic/LLVM.h"
 #include "clang/Frontend/CompilerInstance.h"
+#include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Frontend/FrontendAction.h"
 #include "clang/Lex/PreprocessorOptions.h"
+#include "clang/Tooling/Core/Replacement.h"
 #include "clang/Tooling/Syntax/BuildTree.h"
+#include "clang/Tooling/Syntax/Mutations.h"
 #include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/Tokens.h"
 #include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Testing/Support/Annotations.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <cstdlib>
@@ -24,6 +34,15 @@
 using namespace clang;
 
 namespace {
+static llvm::ArrayRef<syntax::Token> tokens(syntax::Node *N) {
+  assert(N->isOriginal() && "tokens of modified nodes are not well-defined");
+  if (auto *L = dyn_cast<syntax::Leaf>(N))
+    return llvm::makeArrayRef(L->token(), 1);
+  auto *T = cast<syntax::Tree>(N);
+  return llvm::makeArrayRef(T->firstLeaf()->token(),
+                            T->lastLeaf()->token() + 1);
+}
+
 class SyntaxTreeTest : public ::testing::Test {
 protected:
   // Build a syntax tree for the code.
@@ -80,13 +99,13 @@
     // Prepare to run a compiler.
     std::vector<const char *> Args = {"syntax-test", "-std=c++11",
                                       "-fsyntax-only", FileName};
-    auto CI = createInvocationFromCommandLine(Args, Diags, FS);
-    assert(CI);
-    CI->getFrontendOpts().DisableFree = false;
-    CI->getPreprocessorOpts().addRemappedFile(
+    Invocation = createInvocationFromCommandLine(Args, Diags, FS);
+    assert(Invocation);
+    Invocation->getFrontendOpts().DisableFree = false;
+    Invocation->getPreprocessorOpts().addRemappedFile(
         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
     CompilerInstance Compiler;
-    Compiler.setInvocation(std::move(CI));
+    Compiler.setInvocation(Invocation);
     Compiler.setDiagnostics(Diags.get());
     Compiler.setFileManager(FileMgr.get());
     Compiler.setSourceManager(SourceMgr.get());
@@ -108,6 +127,27 @@
     }
   }
 
+  /// Finds the deepest node in the tree that covers exactly \p R.
+  /// FIXME: implement this efficiently and move to public syntax tree API.
+  syntax::Node *nodeByRange(llvm::Annotations::Range R, syntax::Node *Root) {
+    llvm::ArrayRef<syntax::Token> Toks = tokens(Root);
+
+    if (Toks.front().location().isFileID() &&
+        Toks.back().location().isFileID() &&
+        syntax::Token::range(*SourceMgr, Toks.front(), Toks.back()) ==
+            syntax::FileRange(SourceMgr->getMainFileID(), R.Begin, R.End))
+      return Root;
+
+    auto *T = dyn_cast<syntax::Tree>(Root);
+    if (!T)
+      return nullptr;
+    for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling()) {
+      if (auto *Result = nodeByRange(R, C))
+        return Result;
+    }
+    return nullptr;
+  }
+
   // Data fields.
   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
@@ -117,6 +157,7 @@
       new FileManager(FileSystemOptions(), FS);
   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
       new SourceManager(*Diags, *FileMgr);
+  std::shared_ptr<CompilerInvocation> Invocation;
   // Set after calling buildTree().
   std::unique_ptr<syntax::Arena> Arena;
 };
@@ -695,6 +736,39 @@
   | `-;
   `-}
        )txt"},
+      // Some nodes are non-modifiable, they are marked with 'I:'.
+      {R"cpp(
+#define HALF_IF if (1+
+#define HALF_IF_2 1) {}
+void test() {
+  HALF_IF HALF_IF_2 else {}
+})cpp",
+       R"txt(
+*: TranslationUnit
+`-SimpleDeclaration
+  |-void
+  |-test
+  |-(
+  |-)
+  `-CompoundStatement
+    |-{
+    |-IfStatement
+    | |-I: if
+    | |-I: (
+    | |-I: UnknownExpression
+    | | |-I: 1
+    | | |-I: +
+    | | `-I: 1
+    | |-I: )
+    | |-I: CompoundStatement
+    | | |-I: {
+    | | `-I: }
+    | |-else
+    | `-CompoundStatement
+    |   |-{
+    |   `-}
+    `-}
+       )txt"},
   };
 
   for (const auto &T : Cases) {
@@ -706,4 +780,45 @@
     EXPECT_EQ(Expected, Actual) << "the resulting dump is:\n" << Actual;
   }
 }
+
+TEST_F(SyntaxTreeTest, Mutations) {
+  using Transformation = std::function<void(
+      const llvm::Annotations & /*Input*/, syntax::TranslationUnit * /*Root*/)>;
+  auto CheckTransformation = [this](std::string Input, std::string Expected,
+                                    Transformation Transform) -> void {
+    llvm::Annotations Source(Input);
+    auto *Root = buildTree(Source.code());
+
+    Transform(Source, Root);
+
+    auto Replacements = syntax::computeReplacements(*Arena, *Root);
+    auto Output = tooling::applyAllReplacements(Source.code(), Replacements);
+    if (!Output) {
+      ADD_FAILURE() << "could not apply replacements: "
+                    << llvm::toString(Output.takeError());
+      return;
+    }
+
+    EXPECT_EQ(Expected, *Output) << "input is:\n" << Input;
+  };
+
+  // Removes the selected statement. Input should have exactly one selected
+  // range and it should correspond to a single statement.
+  auto RemoveStatement = [this](const llvm::Annotations &Input,
+                                syntax::TranslationUnit *TU) {
+    auto *S = cast<syntax::Statement>(nodeByRange(Input.range(), TU));
+    ASSERT_TRUE(S->canModify()) << "cannot remove a statement";
+    syntax::removeStatement(*Arena, S);
+  };
+
+  std::vector<std::pair<std::string /*Input*/, std::string /*Expected*/>>
+      Cases = {
+          {"void test() { [[100+100;]] test(); }", "void test() {  test(); }"},
+          {"void test() { if (true) [[{}]] else {} }",
+           "void test() { if (true) ; else {} }"},
+          {"void test() { [[;]] }", "void test() {  }"}};
+  for (const auto &C : Cases)
+    CheckTransformation(C.first, C.second, RemoveStatement);
+}
+
 } // namespace
Index: clang/unittests/Tooling/Syntax/CMakeLists.txt
===================================================================
--- clang/unittests/Tooling/Syntax/CMakeLists.txt
+++ clang/unittests/Tooling/Syntax/CMakeLists.txt
@@ -15,6 +15,7 @@
   clangLex
   clangSerialization
   clangTooling
+  clangToolingCore
   clangToolingSyntax
   )
 
Index: clang/lib/Tooling/Syntax/Tree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tree.cpp
+++ clang/lib/Tooling/Syntax/Tree.cpp
@@ -40,7 +40,10 @@
 
 syntax::Node::Node(NodeKind Kind)
     : Parent(nullptr), NextSibling(nullptr), Kind(static_cast<unsigned>(Kind)),
-      Role(static_cast<unsigned>(NodeRole::Detached)) {}
+      Role(static_cast<unsigned>(NodeRole::Detached)), Original(false),
+      CanModify(false) {}
+
+bool syntax::Node::isDetached() const { return role() == NodeRole::Detached; }
 
 bool syntax::Tree::classof(const Node *N) { return N->kind() > NodeKind::Leaf; }
 
@@ -56,6 +59,48 @@
   this->FirstChild = Child;
 }
 
+void syntax::Tree::replaceChildRangeLowLevel(Node *BeforeBegin, Node *End,
+                                             Node *New) {
+  assert(!BeforeBegin || BeforeBegin->Parent == this);
+
+#ifndef NDEBUG
+  for (auto *N = New; N; N = N->nextSibling()) {
+    assert(N->Parent == nullptr);
+    assert(N->role() != NodeRole::Detached && "Roles must be set");
+    // FIXME: sanity-check the role.
+  }
+#endif
+
+  // Detach old nodes.
+  for (auto *N = !BeforeBegin ? FirstChild : BeforeBegin->nextSibling();
+       N != End;) {
+    auto *Next = N->NextSibling;
+
+    N->Role = static_cast<unsigned>(NodeRole::Detached);
+    N->Parent = nullptr;
+    N->NextSibling = nullptr;
+
+    N = Next;
+  }
+
+  // Attach new nodes.
+  if (BeforeBegin)
+    BeforeBegin->NextSibling = New ? New : End;
+  else
+    FirstChild = New ? New : End;
+
+  if (New) {
+    auto *Last = New;
+    while (auto *Next = Last->nextSibling())
+      Last = Next;
+    Last->NextSibling = End;
+  }
+
+  // Mark the node as modified.
+  for (auto *T = this; T && T->Original; T = T->Parent)
+    T->Original = false;
+}
+
 namespace {
 static void traverse(const syntax::Node *N,
                      llvm::function_ref<void(const syntax::Node *)> Visit) {
@@ -85,9 +130,15 @@
 
 static void dumpTree(llvm::raw_ostream &OS, const syntax::Node *N,
                      const syntax::Arena &A, std::vector<bool> IndentMask) {
+  std::string Marks;
+  if (!N->isOriginal())
+    Marks += "M";
   if (N->role() == syntax::NodeRole::Detached)
-    OS << "*: ";
-  // FIXME: find a nice way to print other roles.
+    Marks += "*"; // FIXME: find a nice way to print other roles.
+  if (!N->canModify())
+    Marks += "I";
+  if (!Marks.empty())
+    OS << Marks << ": ";
 
   if (auto *L = llvm::dyn_cast<syntax::Leaf>(N)) {
     dumpTokens(OS, *L->token(), A.sourceManager());
@@ -133,10 +184,35 @@
     if (!L)
       return;
     ::dumpTokens(OS, *L->token(), A.sourceManager());
+    OS << " ";
   });
   return OS.str();
 }
 
+syntax::Leaf *syntax::Tree::firstLeaf() {
+  auto *T = this;
+  while (auto *C = T->firstChild()) {
+    if (auto *L = dyn_cast<syntax::Leaf>(C))
+      return L;
+    T = cast<syntax::Tree>(C);
+  }
+  return nullptr;
+}
+
+syntax::Leaf *syntax::Tree::lastLeaf() {
+  auto *T = this;
+  while (auto *C = T->firstChild()) {
+    // Find the last child.
+    while (auto *Next = C->nextSibling())
+      C = Next;
+
+    if (auto *L = dyn_cast<syntax::Leaf>(C))
+      return L;
+    T = cast<syntax::Tree>(C);
+  }
+  return nullptr;
+}
+
 syntax::Node *syntax::Tree::findChild(NodeRole R) {
   for (auto *C = FirstChild; C; C = C->nextSibling()) {
     if (C->role() == R)
Index: clang/lib/Tooling/Syntax/Tokens.cpp
===================================================================
--- clang/lib/Tooling/Syntax/Tokens.cpp
+++ clang/lib/Tooling/Syntax/Tokens.cpp
@@ -67,7 +67,7 @@
   auto F = First.range(SM);
   auto L = Last.range(SM);
   assert(F.file() == L.file() && "tokens from different files");
-  assert(F.endOffset() <= L.beginOffset() && "wrong order of tokens");
+  assert(F == L || F.endOffset() <= L.beginOffset() && "wrong order of tokens");
   return FileRange(F.file(), F.beginOffset(), L.endOffset());
 }
 
@@ -135,6 +135,12 @@
   return {Begin, End};
 }
 
+CharSourceRange FileRange::toCharRange(const SourceManager &SM) const {
+  return CharSourceRange(
+      SourceRange(SM.getComposedLoc(File, Begin), SM.getComposedLoc(File, End)),
+      /*IsTokenRange=*/false);
+}
+
 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
 TokenBuffer::spelledForExpandedToken(const syntax::Token *Expanded) const {
   assert(Expanded);
Index: clang/lib/Tooling/Syntax/Synthesis.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/Synthesis.cpp
@@ -0,0 +1,38 @@
+//===- Synthesis.cpp ------------------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Tooling/Syntax/BuildTree.h"
+
+using namespace clang;
+
+/// Exposes private syntax tree APIs required to implement node synthesis.
+/// Should not be used for anything else.
+class syntax::FactoryImpl {
+public:
+  static void prependChildLowLevel(syntax::Tree *T, syntax::Node *Child,
+                                   syntax::NodeRole R) {
+    T->prependChildLowLevel(Child, R);
+  }
+};
+
+clang::syntax::Leaf *syntax::createPunctuation(clang::syntax::Arena &A,
+                                               clang::tok::TokenKind K) {
+  auto Tokens = A.lexBuffer(llvm::MemoryBuffer::getMemBuffer(
+                                clang::tok::getPunctuatorSpelling(K)))
+                    .second;
+  assert(Tokens.size() == 1);
+  assert(Tokens.front().kind() == K);
+  return new (A.allocator()) clang::syntax::Leaf(Tokens.begin());
+}
+
+clang::syntax::EmptyStatement *
+syntax::createEmptyStatement(clang::syntax::Arena &A) {
+  auto *S = new (A.allocator()) clang::syntax::EmptyStatement;
+  FactoryImpl::prependChildLowLevel(S, createPunctuation(A, clang::tok::semi),
+                                    NodeRole::Unknown);
+  return S;
+}
Index: clang/lib/Tooling/Syntax/Mutations.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/Mutations.cpp
@@ -0,0 +1,84 @@
+//===- Mutations.cpp ------------------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Tooling/Syntax/Mutations.h"
+#include "clang/Basic/LLVM.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Lex/Token.h"
+#include "clang/Tooling/Core/Replacement.h"
+#include "clang/Tooling/Syntax/BuildTree.h"
+#include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "clang/Tooling/Syntax/Tree.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/Support/Casting.h"
+#include <cassert>
+#include <string>
+
+using namespace clang;
+
+// This class has access to the internals of tree nodes. Its sole purpose is to
+// define helpers that allow implementing the high-level mutation operations.
+class syntax::MutationsImpl {
+public:
+  /// Add a new node with a specified role.
+  static void addAfter(syntax::Node *Anchor, syntax::Node *New, NodeRole Role) {
+    assert(New->Parent == nullptr);
+    assert(New->NextSibling == nullptr);
+    assert(!New->isDetached());
+    assert(Role != NodeRole::Detached);
+
+    New->Role = static_cast<unsigned>(Role);
+    Anchor->parent()->replaceChildRangeLowLevel(Anchor, Anchor, New);
+  }
+
+  /// Replace the node, keeping the role.
+  static void replace(syntax::Node *Old, syntax::Node *New) {
+    assert(New->Parent == nullptr);
+    assert(New->NextSibling == nullptr);
+    assert(New->isDetached());
+
+    New->Role = Old->Role;
+    Old->parent()->replaceChildRangeLowLevel(findPrevious(Old),
+                                             Old->nextSibling(), New);
+  }
+
+  /// Completely remove the node from its parent.
+  static void remove(syntax::Node *N) {
+    N->parent()->replaceChildRangeLowLevel(findPrevious(N), N->nextSibling(),
+                                           /*New=*/nullptr);
+  }
+
+private:
+  static syntax::Node *findPrevious(syntax::Node *N) {
+    if (N->parent()->firstChild() == N)
+      return nullptr;
+    for (syntax::Node *C = N->parent()->firstChild(); C != nullptr;
+         C = C->nextSibling()) {
+      if (C->nextSibling() == N)
+        return C;
+    }
+    llvm_unreachable("could not find a child node");
+  }
+};
+
+void syntax::removeStatement(syntax::Arena &A, syntax::Statement *S) {
+  assert(S->canModify());
+
+  if (auto *Parent = llvm::dyn_cast<CompoundStatement>(S->parent())) {
+    // A child of CompoundStatement can just be safely removed.
+    MutationsImpl::remove(S);
+    return;
+  }
+  // For the rest, we have to replace with an empty statement.
+  if (isa<EmptyStatement>(S))
+    return; // already an empty statement, nothing to do.
+
+  MutationsImpl::replace(S, createEmptyStatement(A));
+}
Index: clang/lib/Tooling/Syntax/ComputeReplacements.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Syntax/ComputeReplacements.cpp
@@ -0,0 +1,126 @@
+//===- ComputeReplacements.cpp --------------------------------*- C++ -*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+#include "clang/Tooling/Core/Replacement.h"
+#include "clang/Tooling/Syntax/Mutations.h"
+#include "clang/Tooling/Syntax/Tokens.h"
+#include "llvm/Support/Error.h"
+
+using namespace clang;
+
+namespace {
+using ProcessTokensFn = llvm::function_ref<void(llvm::ArrayRef<syntax::Token>,
+                                                bool /*IsOriginal*/)>;
+/// Enumerates spans of tokens from the tree consecutively laid out in memory.
+void enumerateTokenSpans(const syntax::Tree *Root, ProcessTokensFn Callback) {
+  struct Enumerator {
+    Enumerator(ProcessTokensFn Callback)
+        : SpanBegin(nullptr), SpanEnd(nullptr), SpanIsOriginal(false),
+          Callback(Callback) {}
+
+    void run(const syntax::Tree *Root) {
+      process(Root);
+      // Report the last span to the user.
+      if (SpanBegin)
+        Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal);
+    }
+
+  private:
+    void process(const syntax::Node *N) {
+      if (auto *T = dyn_cast<syntax::Tree>(N)) {
+        for (auto *C = T->firstChild(); C != nullptr; C = C->nextSibling())
+          process(C);
+        return;
+      }
+
+      auto *L = cast<syntax::Leaf>(N);
+      if (SpanEnd == L->token() && SpanIsOriginal == L->isOriginal()) {
+        // Extend the current span.
+        ++SpanEnd;
+        return;
+      }
+      // Report the current span to the user.
+      if (SpanBegin)
+        Callback(llvm::makeArrayRef(SpanBegin, SpanEnd), SpanIsOriginal);
+      // Start recording a new span.
+      SpanBegin = L->token();
+      SpanEnd = SpanBegin + 1;
+      SpanIsOriginal = L->isOriginal();
+    }
+
+    const syntax::Token *SpanBegin;
+    const syntax::Token *SpanEnd;
+    bool SpanIsOriginal;
+    ProcessTokensFn Callback;
+  };
+
+  return Enumerator(Callback).run(Root);
+}
+
+syntax::FileRange rangeOfExpanded(const syntax::Arena &A,
+                                  llvm::ArrayRef<syntax::Token> Expanded) {
+  auto &Buffer = A.tokenBuffer();
+  auto &SM = A.sourceManager();
+
+  // Check that \p Expanded actually points into expanded tokens.
+  assert(Buffer.expandedTokens().begin() <= Expanded.begin());
+  assert(Expanded.end() < Buffer.expandedTokens().end());
+
+  if (Expanded.empty())
+    // (!) empty tokens must always point before end().
+    return syntax::FileRange(
+        SM, SM.getExpansionLoc(Expanded.begin()->location()), /*Length=*/0);
+
+  auto Spelled = Buffer.spelledForExpanded(Expanded);
+  assert(Spelled && "could not find spelled tokens for expanded");
+  return syntax::Token::range(SM, Spelled->front(), Spelled->back());
+}
+} // namespace
+
+tooling::Replacements
+syntax::computeReplacements(const syntax::Arena &A,
+                            const syntax::TranslationUnit &TU) {
+  auto &Buffer = A.tokenBuffer();
+  auto &SM = A.sourceManager();
+
+  tooling::Replacements Replacements;
+  // Text inserted by the replacement we are building now.
+  std::string Replacement;
+  auto emitReplacement = [&](llvm::ArrayRef<syntax::Token> ReplacedRange) {
+    if (ReplacedRange.empty() && Replacement.empty())
+      return;
+    llvm::cantFail(Replacements.add(tooling::Replacement(
+        SM, rangeOfExpanded(A, ReplacedRange).toCharRange(SM), Replacement)));
+    Replacement = "";
+  };
+
+  const syntax::Token *NextOriginal = Buffer.expandedTokens().begin();
+  enumerateTokenSpans(
+      &TU, [&](llvm::ArrayRef<syntax::Token> Tokens, bool IsOriginal) {
+        if (!IsOriginal) {
+          Replacement +=
+              syntax::Token::range(SM, Tokens.front(), Tokens.back()).text(SM);
+          return;
+        }
+        assert(NextOriginal <= Tokens.begin());
+        // We are looking at a span of original tokens.
+        if (NextOriginal != Tokens.begin()) {
+          // There is a gap, record a replacement or deletion.
+          emitReplacement(llvm::makeArrayRef(NextOriginal, Tokens.begin()));
+        } else {
+          // No gap, but we may have pending insertions. Emit them now.
+          emitReplacement(llvm::makeArrayRef(NextOriginal, /*Length=*/0));
+        }
+        NextOriginal = Tokens.end();
+      });
+
+  // We might have pending replacements at the end of file. If so, emit them.
+  emitReplacement(llvm::makeArrayRef(
+      NextOriginal, Buffer.expandedTokens().drop_back().end()));
+
+  return Replacements;
+}
Index: clang/lib/Tooling/Syntax/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/Syntax/CMakeLists.txt
+++ clang/lib/Tooling/Syntax/CMakeLists.txt
@@ -2,7 +2,10 @@
 
 add_clang_library(clangToolingSyntax
   BuildTree.cpp
+  ComputeReplacements.cpp
   Nodes.cpp
+  Mutations.cpp
+  Synthesis.cpp
   Tokens.cpp
   Tree.cpp
 
Index: clang/lib/Tooling/Syntax/BuildTree.cpp
===================================================================
--- clang/lib/Tooling/Syntax/BuildTree.cpp
+++ clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -25,6 +25,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
 #include <map>
 
@@ -85,7 +86,7 @@
     assert(Tokens.back().kind() == tok::eof);
 
     // Build the root of the tree, consuming all the children.
-    Pending.foldChildren(Tokens.drop_back(),
+    Pending.foldChildren(Arena, Tokens.drop_back(),
                          new (Arena.allocator()) syntax::TranslationUnit);
 
     return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
@@ -156,9 +157,12 @@
       assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
       // Create all leaf nodes.
       // Note that we do not have 'eof' in the tree.
-      for (auto &T : A.tokenBuffer().expandedTokens().drop_back())
-        Trees.insert(Trees.end(),
-                     {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
+      for (auto &T : A.tokenBuffer().expandedTokens().drop_back()) {
+        auto *L = new (A.allocator()) syntax::Leaf(&T);
+        L->Original = true;
+        L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
+        Trees.insert(Trees.end(), {&T, NodeAndRole{L}});
+      }
     }
 
     ~Forest() { assert(DelayedFolds.empty()); }
@@ -176,18 +180,19 @@
     }
 
     /// Add \p Node to the forest and attach child nodes based on \p Tokens.
-    void foldChildren(llvm::ArrayRef<syntax::Token> Tokens,
+    void foldChildren(const syntax::Arena &A,
+                      llvm::ArrayRef<syntax::Token> Tokens,
                       syntax::Tree *Node) {
       // Execute delayed folds inside `Tokens`.
       auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin());
       auto It = BeginExecuted;
       for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It)
-        foldChildrenEager(llvm::makeArrayRef(It->first, It->second.End),
+        foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End),
                           It->second.Node);
       DelayedFolds.erase(BeginExecuted, It);
 
       // Attach children to `Node`.
-      foldChildrenEager(Tokens, Node);
+      foldChildrenEager(A, Tokens, Node);
     }
 
     /// Schedule a call to `foldChildren` that will only be executed when
@@ -244,7 +249,8 @@
   private:
     /// Implementation detail of `foldChildren`, does acutal folding ignoring
     /// delayed folds.
-    void foldChildrenEager(llvm::ArrayRef<syntax::Token> Tokens,
+    void foldChildrenEager(const syntax::Arena &A,
+                           llvm::ArrayRef<syntax::Token> Tokens,
                            syntax::Tree *Node) {
       assert(Node->firstChild() == nullptr && "node already has children");
 
@@ -263,6 +269,10 @@
         Node->prependChildLowLevel(std::prev(It)->second.Node,
                                    std::prev(It)->second.Role);
 
+      // Mark that this node came from the AST and is backed by the source code.
+      Node->Original = true;
+      Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
+
       Trees.erase(BeginChildren, EndChildren);
       Trees.insert({FirstToken, NodeAndRole(Node)});
     }
@@ -585,7 +595,7 @@
 
 void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
                                    syntax::Tree *New) {
-  Pending.foldChildren(Range, New);
+  Pending.foldChildren(Arena, Range, New);
 }
 
 void syntax::TreeBuilder::noticeDeclaratorRange(
@@ -617,7 +627,8 @@
     Pending.assignRole(getExprRange(E),
                        NodeRole::ExpressionStatement_expression);
     // (!) 'getRange(Stmt)' ensures this already covers a trailing semicolon.
-    Pending.foldChildren(Range, new (allocator()) syntax::ExpressionStatement);
+    Pending.foldChildren(Arena, Range,
+                         new (allocator()) syntax::ExpressionStatement);
   }
   Pending.assignRole(Range, Role);
 }
Index: clang/include/clang/Tooling/Syntax/Tree.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tree.h
+++ clang/include/clang/Tooling/Syntax/Tree.h
@@ -65,6 +65,9 @@
 
 class Tree;
 class TreeBuilder;
+class FactoryImpl;
+class MutationsImpl;
+
 enum class NodeKind : uint16_t;
 enum class NodeRole : uint8_t;
 
@@ -79,6 +82,23 @@
   NodeKind kind() const { return static_cast<NodeKind>(Kind); }
   NodeRole role() const { return static_cast<NodeRole>(Role); }
 
+  /// Whether the node is detached from a tree, i.e. does not have a parent.
+  bool isDetached() const;
+  /// Whether the node was created from the AST backed by the source code
+  /// rather than added later through mutation APIs or created with factory
+  /// functions.
+  /// When this flag is true, all subtrees are also original.
+  /// This flag is set to false on any modifications to the node or any of its
+  /// subtrees, even if this simply involves swapping existing subtrees.
+  bool isOriginal() const { return Original; }
+  /// If this function return false, the tree cannot be modified because there
+  /// is no reasonable way to produce the corresponding textual replacements.
+  /// This can happen when the node crosses macro expansion boundaries.
+  ///
+  /// Note that even if the node is not modifiable, its child nodes can be
+  /// modifiable.
+  bool canModify() const { return CanModify; }
+
   const Tree *parent() const { return Parent; }
   Tree *parent() { return Parent; }
 
@@ -93,11 +113,17 @@
 private:
   // Tree is allowed to change the Parent link and Role.
   friend class Tree;
+  // TreeBuilder is allowed to set the Original and CanModify flags.
+  friend class TreeBuilder;
+  // MutationsImpl sets roles and CanModify flag.
+  friend class MutationsImpl;
 
   Tree *Parent;
   Node *NextSibling;
   unsigned Kind : 16;
   unsigned Role : 8;
+  unsigned Original : 1;
+  unsigned CanModify : 1;
 };
 
 /// A leaf node points to a single token inside the expanded token stream.
@@ -121,6 +147,14 @@
   Node *firstChild() { return FirstChild; }
   const Node *firstChild() const { return FirstChild; }
 
+  Leaf *firstLeaf();
+  const Leaf *firstLeaf() const {
+    return const_cast<Tree *>(this)->firstLeaf();
+  }
+
+  Leaf *lastLeaf();
+  const Leaf *lastLeaf() const { return const_cast<Tree *>(this)->lastLeaf(); }
+
 protected:
   /// Find the first node with a corresponding role.
   syntax::Node *findChild(NodeRole R);
@@ -128,10 +162,18 @@
 private:
   /// Prepend \p Child to the list of children and and sets the parent pointer.
   /// A very low-level operation that does not check any invariants, only used
-  /// by TreeBuilder.
+  /// by TreeBuilder and FactoryImpl.
   /// EXPECTS: Role != NodeRoleDetached.
   void prependChildLowLevel(Node *Child, NodeRole Role);
   friend class TreeBuilder;
+  friend class FactoryImpl;
+
+  /// Replace a range of children [BeforeBegin->NextSibling, End) with a list of
+  /// new nodes starting at \p New.
+  /// Only used by MutationsImpl to implement higher-level mutation operations.
+  /// (!) \p New can be null to model removal of the child range.
+  void replaceChildRangeLowLevel(Node *BeforeBegin, Node *End, Node *New);
+  friend class MutationsImpl;
 
   Node *FirstChild = nullptr;
 };
Index: clang/include/clang/Tooling/Syntax/Tokens.h
===================================================================
--- clang/include/clang/Tooling/Syntax/Tokens.h
+++ clang/include/clang/Tooling/Syntax/Tokens.h
@@ -78,6 +78,10 @@
   /// Gets the substring that this FileRange refers to.
   llvm::StringRef text(const SourceManager &SM) const;
 
+  /// Convert to the clang range. The returned range is always a char range,
+  /// never a token range.
+  CharSourceRange toCharRange(const SourceManager &SM) const;
+
   friend bool operator==(const FileRange &L, const FileRange &R) {
     return std::tie(L.File, L.Begin, L.End) == std::tie(R.File, R.Begin, R.End);
   }
Index: clang/include/clang/Tooling/Syntax/Mutations.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Syntax/Mutations.h
@@ -0,0 +1,37 @@
+//===- Mutations.h - mutate syntax trees --------------------*- C++ ---*-=====//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Defines high-level APIs for transforming syntax trees and producing the
+// corresponding textual replacements.
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H
+#define LLVM_CLANG_TOOLING_SYNTAX_MUTATIONS_H
+
+#include "clang/Tooling/Core/Replacement.h"
+#include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/Tree.h"
+
+namespace clang {
+namespace syntax {
+
+/// Computes textual replacements required to mimic the tree modifications made
+/// to the syntax tree.
+tooling::Replacements computeReplacements(const Arena &A,
+                                          const syntax::TranslationUnit &TU);
+
+/// Removes a statement or replaces it with an empty statement where one is
+/// required syntactically. E.g., in the following example:
+///     if (cond) { foo(); } else bar();
+/// One can remove `foo();` completely and to remove `bar();` we would need to
+/// replace it with an empty statement.
+/// EXPECTS: S->canModify() == true
+void removeStatement(syntax::Arena &A, syntax::Statement *S);
+
+} // namespace syntax
+} // namespace clang
+
+#endif
Index: clang/include/clang/Tooling/Syntax/BuildTree.h
===================================================================
--- clang/include/clang/Tooling/Syntax/BuildTree.h
+++ clang/include/clang/Tooling/Syntax/BuildTree.h
@@ -11,7 +11,9 @@
 #define LLVM_CLANG_TOOLING_SYNTAX_TREE_H
 
 #include "clang/AST/Decl.h"
+#include "clang/Basic/TokenKinds.h"
 #include "clang/Tooling/Syntax/Nodes.h"
+#include "clang/Tooling/Syntax/Tree.h"
 
 namespace clang {
 namespace syntax {
@@ -19,6 +21,13 @@
 /// Build a syntax tree for the main file.
 syntax::TranslationUnit *buildSyntaxTree(Arena &A,
                                          const clang::TranslationUnitDecl &TU);
+
+// Create syntax trees from subtrees not backed by the source code.
+
+clang::syntax::Leaf *createPunctuation(clang::syntax::Arena &A,
+                                       clang::tok::TokenKind K);
+clang::syntax::EmptyStatement *createEmptyStatement(clang::syntax::Arena &A);
+
 } // namespace syntax
 } // namespace clang
 #endif
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to