ymandel created this revision.
ymandel added a reviewer: ilya-biryukov.
Herald added a subscriber: mgorny.
Herald added a project: clang.

NOTE: This is a preliminary revision for discussion; tests have not yet been 
provided.

The RangeSelector library defines a combinator language for specifying source
ranges based on bound id for AST nodes.  The combinator approach follows the
design of the AST matchers.  The RangeSelectors defined here will be used in
both RewriteRule, for specifying source affected by edit, and in Stencil for
specifying source to use constructively in a replacement.

This revision extends the SourceCode library with utility functions needed by
RangeSelector.  Some of them come are copied from clang-tidy/utils/LexUtils,
since clang/Tooling can't depend on clang-tidy libraries.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D61774

Files:
  clang/include/clang/Tooling/Refactoring/RangeSelector.h
  clang/include/clang/Tooling/Refactoring/SourceCode.h
  clang/lib/Tooling/Refactoring/CMakeLists.txt
  clang/lib/Tooling/Refactoring/RangeSelector.cpp
  clang/lib/Tooling/Refactoring/SourceCode.cpp

Index: clang/lib/Tooling/Refactoring/SourceCode.cpp
===================================================================
--- clang/lib/Tooling/Refactoring/SourceCode.cpp
+++ clang/lib/Tooling/Refactoring/SourceCode.cpp
@@ -14,18 +14,58 @@
 
 using namespace clang;
 
-StringRef clang::tooling::getText(CharSourceRange Range,
-                                  const ASTContext &Context) {
+StringRef tooling::getText(CharSourceRange Range, const ASTContext &Context) {
   return Lexer::getSourceText(Range, Context.getSourceManager(),
                               Context.getLangOpts());
 }
 
-CharSourceRange clang::tooling::maybeExtendRange(CharSourceRange Range,
-                                                 tok::TokenKind Next,
-                                                 ASTContext &Context) {
+CharSourceRange tooling::maybeExtendRange(CharSourceRange Range,
+                                          tok::TokenKind Next,
+                                          ASTContext &Context) {
   Optional<Token> Tok = Lexer::findNextToken(
       Range.getEnd(), Context.getSourceManager(), Context.getLangOpts());
   if (!Tok || !Tok->is(Next))
     return Range;
   return CharSourceRange::getTokenRange(Range.getBegin(), Tok->getLocation());
 }
+
+SourceLocation tooling::findPreviousTokenStart(SourceLocation Start,
+                                               const SourceManager &SM,
+                                               const LangOptions &LangOpts) {
+  if (Start.isInvalid() || Start.isMacroID())
+    return SourceLocation();
+
+  SourceLocation BeforeStart = Start.getLocWithOffset(-1);
+  if (BeforeStart.isInvalid() || BeforeStart.isMacroID())
+    return SourceLocation();
+
+  return Lexer::GetBeginningOfToken(BeforeStart, SM, LangOpts);
+}
+
+SourceLocation tooling::findPreviousTokenKind(SourceLocation Start,
+                                              const SourceManager &SM,
+                                              const LangOptions &LangOpts,
+                                              tok::TokenKind TK) {
+  while (true) {
+    SourceLocation L = findPreviousTokenStart(Start, SM, LangOpts);
+    if (L.isInvalid() || L.isMacroID())
+      return SourceLocation();
+
+    Token T;
+    if (Lexer::getRawToken(L, T, SM, LangOpts, /*IgnoreWhiteSpace=*/true))
+      return SourceLocation();
+
+    if (T.is(TK))
+      return T.getLocation();
+
+    Start = L;
+  }
+}
+
+SourceLocation tooling::findOpenParen(const CallExpr &E,
+                                      const SourceManager &SM,
+                                      const LangOptions &LangOpts) {
+  SourceLocation EndLoc =
+      E.getNumArgs() == 0 ? E.getRParenLoc() : E.getArg(0)->getBeginLoc();
+  return findPreviousTokenKind(EndLoc, SM, LangOpts, tok::TokenKind::l_paren);
+}
Index: clang/lib/Tooling/Refactoring/RangeSelector.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Refactoring/RangeSelector.cpp
@@ -0,0 +1,219 @@
+//===--- Transformer.cpp - Transformer library implementation ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Refactoring/RangeSelector.h"
+#include "clang/AST/Expr.h"
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceLocation.h"
+#include "clang/Tooling/Refactoring/SourceCode.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
+#include <string>
+#include <utility>
+#include <vector>
+
+using namespace clang;
+using namespace tooling;
+
+using ast_matchers::MatchFinder;
+using ast_type_traits::ASTNodeKind;
+using ast_type_traits::DynTypedNode;
+using llvm::Error;
+using llvm::StringError;
+
+using MatchResult = MatchFinder::MatchResult;
+
+static Error invalidArgumentError(Twine Message) {
+  return llvm::make_error<StringError>(llvm::errc::invalid_argument, Message);
+}
+
+static Error typeError(StringRef Id, const ASTNodeKind &Kind) {
+  return invalidArgumentError("Mismatched type (node id=" + Id +
+                              " kind=" + Kind.asStringRef() + ")");
+}
+
+static Error typeError(StringRef Id, const ASTNodeKind &Kind, Twine ExpectedType) {
+  return invalidArgumentError("Expected one of " + ExpectedType + " (node id=" + Id +
+                              " kind=" + Kind.asStringRef() + ")");
+}
+
+static Error missingPropertyError(StringRef Id, Twine Description,
+                                  StringRef Property) {
+  return invalidArgumentError(Description + " requires property '" + Property +
+                              "' (node id=" + Id + ")");
+}
+
+static Expected<DynTypedNode> getNode(const ast_matchers::BoundNodes &Nodes,
+                                      StringRef Id) {
+  auto &NodesMap = Nodes.getMap();
+  auto It = NodesMap.find(Id);
+  if (It == NodesMap.end())
+    return invalidArgumentError("Id not bound: " + Id);
+  return It->second;
+}
+
+RangeSelector range_selector::node(StringRef Id) {
+  return [Id](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<DynTypedNode> Node = getNode(Result.Nodes, Id);
+    if (!Node)
+      return Node.takeError();
+    return Node->get<Stmt>() != nullptr && Node->get<Expr>() == nullptr
+               ? getExtendedRange(*Node, tok::TokenKind::semi, *Result.Context)
+               : CharSourceRange::getTokenRange(Node->getSourceRange());
+  };
+}
+
+RangeSelector range_selector::sNode(StringRef Id) {
+  return [Id](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<DynTypedNode> Node = getNode(Result.Nodes, Id);
+    if (!Node)
+      return Node.takeError();
+    return getExtendedRange(*Node, tok::TokenKind::semi, *Result.Context);
+  };
+}
+
+RangeSelector range_selector::range(RangeSelector Begin, RangeSelector End) {
+  return [Begin, End](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<CharSourceRange> BeginRange = Begin(Result);
+    if (!BeginRange)
+      return BeginRange.takeError();
+    Expected<CharSourceRange> EndRange = End(Result);
+    if (!EndRange)
+      return EndRange.takeError();
+    SourceLocation B = BeginRange->getBegin();
+    SourceLocation E = EndRange->getEnd();
+    // Note: we are precluding the possibility of sub-token ranges in the case
+    // that EndRange is a token range.
+    if (Result.SourceManager->isBeforeInTranslationUnit(E, B)) {
+      return invalidArgumentError("Bad range: out of order");
+    }
+    return CharSourceRange(SourceRange(B, E), EndRange->isTokenRange());
+  };
+}
+
+RangeSelector range_selector::nodeRange(StringRef BeginId, StringRef EndId) {
+  return range_selector::range(node(BeginId), node(EndId));
+}
+
+RangeSelector range_selector::member(StringRef Id) {
+  return [Id](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<DynTypedNode> Node = getNode(Result.Nodes, Id);
+    if (!Node)
+      return Node.takeError();
+    if (auto *M = Node->get<clang::MemberExpr>())
+      return CharSourceRange::getTokenRange(
+          M->getMemberNameInfo().getSourceRange());
+    return typeError(Id, Node->getNodeKind(), "MemberExpr");
+  };
+}
+
+RangeSelector range_selector::name(StringRef Id) {
+  return [Id](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<DynTypedNode> N = getNode(Result.Nodes, Id);
+    if (!N)
+      return N.takeError();
+    auto &Node = *N;
+    if (const auto *D = Node.get<NamedDecl>()) {
+      if (!D->getDeclName().isIdentifier())
+        return missingPropertyError(Id, "name", "identifier");
+      SourceLocation L = D->getLocation();
+      auto R = CharSourceRange::getTokenRange(L, L);
+      // Verify that the range covers exactly the name.
+      // FIXME: extend this code to support cases like `operator +` or
+      // `foo<int>` for which this range will be too short.  Doing so will
+      // require subcasing `NamedDecl`, because it doesn't provide virtual
+      // access to the \c DeclarationNameInfo.
+      if (getText(R, *Result.Context) != D->getName())
+        return CharSourceRange();
+      return R;
+    }
+    if (const auto *E = Node.get<DeclRefExpr>()) {
+      if (!E->getNameInfo().getName().isIdentifier())
+        return missingPropertyError(Id, "name", "identifier");
+      SourceLocation L = E->getLocation();
+      return CharSourceRange::getTokenRange(L, L);
+    }
+    if (const auto *I = Node.get<CXXCtorInitializer>()) {
+      if (!I->isMemberInitializer() && I->isWritten())
+        return missingPropertyError(Id, "name",
+                                    "explicit member initializer");
+      SourceLocation L = I->getMemberLocation();
+      return CharSourceRange::getTokenRange(L, L);
+    }
+    return typeError(Id, Node.getNodeKind(),
+                     "DeclRefExpr, NamedDecl, CXXCtorInitializer");
+  };
+}
+
+namespace {
+// Creates a selector from a range-selection function `Func`, which selects a
+// range that is relative to a bound node id.  `ArgT` is the node type expected
+// by `Func`.
+template <typename T, CharSourceRange (*Func)(const MatchResult &, const T &)>
+class RelativeSelector {
+  std::string Id;
+
+public:
+  RelativeSelector(StringRef Id) : Id(Id) {}
+
+  Expected<CharSourceRange> operator()(const MatchResult &Result) {
+    Expected<DynTypedNode> N = getNode(Result.Nodes, Id);
+    if (!N)
+      return N.takeError();
+    if (const auto *Arg = N->get<T>())
+      return Func(Result, *Arg);
+    return typeError(Id, N->getNodeKind());
+  }
+};
+} // namespace
+
+// Returns the range of the statements (all source between the braces).
+static CharSourceRange getStatementsRange(const MatchResult &,
+                                          const CompoundStmt &CS) {
+  return CharSourceRange::getCharRange(CS.getLBracLoc().getLocWithOffset(1),
+                                       CS.getRBracLoc());
+}
+
+RangeSelector range_selector::statements(StringRef Id) {
+  return RelativeSelector<CompoundStmt, getStatementsRange>(Id);
+}
+
+// Returns the range of the source between the call's parentheses.
+static CharSourceRange getArgumentsRange(const MatchResult &Result,
+                                         const CallExpr &CE) {
+  return CharSourceRange::getCharRange(
+      findOpenParen(CE, *Result.SourceManager, Result.Context->getLangOpts())
+          .getLocWithOffset(1),
+      CE.getRParenLoc());
+}
+
+RangeSelector range_selector::args(StringRef Id) {
+  return RelativeSelector<CallExpr, getArgumentsRange>(Id);
+}
+
+// Returns the range of the elements of the initializer list. Includes all
+// source between the braces.
+static CharSourceRange getElementsRange(const MatchResult &,
+                                        const InitListExpr &E) {
+  return CharSourceRange::getCharRange(E.getLBraceLoc().getLocWithOffset(1),
+                                       E.getRBraceLoc());
+}
+
+RangeSelector range_selector::elements(StringRef Id) {
+  return RelativeSelector<InitListExpr, getElementsRange>(Id);
+}
+
+RangeSelector contraction(RangeSelector S) {
+  return [S](const MatchResult &Result) -> Expected<CharSourceRange> {
+    Expected<CharSourceRange> SRange = S(Result);
+    if (!SRange)
+      return SRange.takeError();
+    return Result.SourceManager->getExpansionRange(*SRange);
+  };
+}
Index: clang/lib/Tooling/Refactoring/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/Refactoring/CMakeLists.txt
+++ clang/lib/Tooling/Refactoring/CMakeLists.txt
@@ -6,6 +6,7 @@
   AtomicChange.cpp
   Extract/Extract.cpp
   Extract/SourceExtraction.cpp
+  RangeSelector.cpp
   RefactoringActions.cpp
   Rename/RenamingAction.cpp
   Rename/SymbolOccurrences.cpp
Index: clang/include/clang/Tooling/Refactoring/SourceCode.h
===================================================================
--- clang/include/clang/Tooling/Refactoring/SourceCode.h
+++ clang/include/clang/Tooling/Refactoring/SourceCode.h
@@ -72,6 +72,18 @@
                           ASTContext &Context) {
   return getText(getExtendedRange(Node, Next, Context), Context);
 }
+
+SourceLocation findPreviousTokenStart(SourceLocation Start,
+                                      const SourceManager &SM,
+                                      const LangOptions &LangOpts);
+
+SourceLocation findPreviousTokenKind(SourceLocation Start,
+                                     const SourceManager &SM,
+                                     const LangOptions &LangOpts,
+                                     tok::TokenKind TK);
+
+SourceLocation findOpenParen(const CallExpr &E, const SourceManager &SM,
+                             const LangOptions &LangOpts);
 } // namespace tooling
 } // namespace clang
 #endif // LLVM_CLANG_TOOLING_REFACTOR_SOURCE_CODE_H
Index: clang/include/clang/Tooling/Refactoring/RangeSelector.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Refactoring/RangeSelector.h
@@ -0,0 +1,79 @@
+//===--- RangeSelector.h - Source-selection library ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+///  \file
+///  Defines a combinator library supporting the definition of _selectors_,
+///  which select source ranges based on (bound) AST nodes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_
+#define LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_
+
+#include "clang/ASTMatchers/ASTMatchFinder.h"
+#include "clang/Basic/SourceLocation.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Error.h"
+#include <functional>
+
+namespace clang {
+namespace tooling {
+using RangeSelector = std::function<Expected<CharSourceRange>(
+    const ast_matchers::MatchFinder::MatchResult &)>;
+
+namespace range_selector {
+inline RangeSelector charRange(CharSourceRange R) {
+  return [R](const ast_matchers::MatchFinder::MatchResult &)
+             -> Expected<CharSourceRange> { return R; };
+}
+
+/// \returns the range corresponding to the identified node.
+RangeSelector node(StringRef Id);
+/// Variant of \c node() that identifies the node as a statement, for purposes
+/// of deciding whether to include any trailing semicolon in the selected range.
+/// Only relevant for Expr nodes, which, by default, are *not* considered as
+/// statements.
+/// \returns the range corresponding to the identified node, considered as a
+/// statement.
+RangeSelector sNode(StringRef Id);
+
+/// Convenience version of \c range where end points are nodes.
+RangeSelector nodeRange(StringRef BeginId, StringRef EndId);
+
+/// Given a \c MemberExpr bound to \c Id, selects the member's token.
+RangeSelector member(StringRef Id);
+
+/// Given a \c NamedDecl or \c CxxCtorInitializer bound to \c Id, selects that
+/// token of the relevant name, not including qualifiers.
+RangeSelector name(StringRef Id);
+
+// Given a reference to call expression node, yields the source text of the
+// arguments (all source between the call's parentheses).
+RangeSelector args(StringRef Id);
+
+// Given a reference to a compound statement node, yields the range of the
+// statements (all source between the braces).
+RangeSelector statements(StringRef Id);
+
+// Given a reference to an initializer-list expression node, yields the range of
+// the elements (all source between the braces).
+RangeSelector elements(StringRef Id);
+
+/// Yields the range that starts at the start of \p Begin and extends to the end
+/// of \p End.
+RangeSelector range(RangeSelector Begin, RangeSelector End);
+
+/// Yields the range from which `S` was expanded (possibly along with other
+/// source), if `S` is an expansion, and `S` itself, otherwise.  Corresponds to
+/// `SourceManager::getExpansionRange`.
+RangeSelector contraction(RangeSelector S);
+} // namespace range_selector
+} // namespace tooling
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLING_REFACTOR_RANGE_SELECTOR_H_
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to