sammccall created this revision.
sammccall added a reviewer: SureYeaah.
Herald added subscribers: cfe-commits, kadircet, arphaman, jkorous, MaskRay, 
ilya-biryukov.
Herald added a project: clang.

These aren't formally subexpressions in C++, in this case + is left-associative.
However informally +, *, etc are usually (mathematically) associative and users
consider these subexpressions.

We detect these and in simple cases support extracting the partial expression.
As well as builtin associative operators, we assume that overloads of them
are associative and support those too.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D65139

Files:
  clang-tools-extra/clangd/Selection.cpp
  clang-tools-extra/clangd/Selection.h
  clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp
  clang-tools-extra/clangd/unittests/SelectionTests.cpp
  clang-tools-extra/clangd/unittests/TweakTests.cpp

Index: clang-tools-extra/clangd/unittests/TweakTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/TweakTests.cpp
+++ clang-tools-extra/clangd/unittests/TweakTests.cpp
@@ -444,6 +444,47 @@
           // FIXME: Doesn't work correctly for \[\[clang::uninitialized\]\] int
           // b = [[1]]; since the attr is inside the DeclStmt and the bounds of
           // DeclStmt don't cover the attribute
+
+          // Binary subexpressions
+          {R"cpp(void f() {
+                   int x = 1 + [[2 + 3 + 4]] + 5;
+                 })cpp",
+           R"cpp(void f() {
+                   auto dummy = 2 + 3 + 4; int x = 1 + dummy + 5;
+                 })cpp"},
+          // Non-associative operations have no special support
+          {R"cpp(void f() {
+                   int x = 1 - [[2 - 3 - 4]] - 5;
+                 })cpp",
+           R"cpp(void f() {
+                   auto dummy = 1 - 2 - 3 - 4; int x = dummy - 5;
+                 })cpp"},
+          // A mix of associative operators isn't associative.
+          {R"cpp(void f() {
+                   int x = 1 * [[2 + 3 * 4]] + 5;
+                 })cpp",
+           R"cpp(void f() {
+                   auto dummy = 1 * 2 + 3 * 4; int x = dummy + 5;
+                 })cpp"},
+          // Overloaded operators are supported, we assume associativity
+          // as if they were built-in.
+          {R"cpp(struct S {
+                   S(int);
+                 };
+                 S operator+(S, S);
+
+                 void f() {
+                   S x = S(1) + [[S(2) + S(3) + S(4)]] + S(5);
+                 })cpp",
+           R"cpp(struct S {
+                   S(int);
+                 };
+                 S operator+(S, S);
+
+                 void f() {
+                   auto dummy = S(2) + S(3) + S(4); S x = S(1) + dummy + S(5);
+                 })cpp"},
+
       };
   for (const auto &IO : InputOutputs) {
     checkTransform(ID, IO.first, IO.second);
Index: clang-tools-extra/clangd/unittests/SelectionTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/SelectionTests.cpp
+++ clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -335,6 +335,23 @@
   }
 }
 
+TEST(SelectionTest, Implicit) {
+  const char* Test = R"cpp(
+    struct S { S(const char*); };
+    int f(S);
+    int x = f("^");
+  )cpp";
+  auto AST = TestTU::withCode(Annotations(Test).code()).build();
+  auto T = makeSelectionTree(Test, AST);
+
+  const SelectionTree::Node *Str = T.commonAncestor();
+  EXPECT_EQ("StringLiteral", nodeKind(Str)) << "Implicit selected?";
+  EXPECT_EQ("ImplicitCastExpr", nodeKind(Str->Parent));
+  EXPECT_EQ("CXXConstructExpr", nodeKind(Str->Parent->Parent));
+  EXPECT_EQ(Str, &Str->Parent->Parent->ignoreImplicit())
+      << "Didn't unwrap " << nodeKind(&Str->Parent->Parent->ignoreImplicit());
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp
===================================================================
--- clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp
+++ clang-tools-extra/clangd/refactor/tweaks/ExtractVariable.cpp
@@ -13,7 +13,9 @@
 #include "refactor/Tweak.h"
 #include "clang/AST/ASTContext.h"
 #include "clang/AST/Expr.h"
+#include "clang/AST/ExprCXX.h"
 #include "clang/AST/OperationKinds.h"
+#include "clang/AST/PrettyPrinter.h"
 #include "clang/AST/RecursiveASTVisitor.h"
 #include "clang/AST/Stmt.h"
 #include "clang/AST/StmtCXX.h"
@@ -26,6 +28,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
 
 namespace clang {
 namespace clangd {
@@ -38,10 +41,14 @@
   const clang::Expr *getExpr() const { return Expr; }
   const SelectionTree::Node *getExprNode() const { return ExprNode; }
   bool isExtractable() const { return Extractable; }
+  // The half-open range for the expression to be extracted.
+  SourceRange getExtractionChars() const;
   // Generate Replacement for replacing selected expression with given VarName
-  tooling::Replacement replaceWithVar(llvm::StringRef VarName) const;
+  tooling::Replacement replaceWithVar(SourceRange Chars,
+                                      llvm::StringRef VarName) const;
   // Generate Replacement for declaring the selected Expr as a new variable
-  tooling::Replacement insertDeclaration(llvm::StringRef VarName) const;
+  tooling::Replacement insertDeclaration(llvm::StringRef VarName,
+                                         SourceRange InitChars) const;
 
 private:
   bool Extractable = false;
@@ -166,23 +173,20 @@
   }
   return nullptr;
 }
+
 // returns the replacement for substituting the extraction with VarName
 tooling::Replacement
-ExtractionContext::replaceWithVar(llvm::StringRef VarName) const {
-  const llvm::Optional<SourceRange> ExtractionRng =
-      toHalfOpenFileRange(SM, Ctx.getLangOpts(), getExpr()->getSourceRange());
-  unsigned ExtractionLength = SM.getFileOffset(ExtractionRng->getEnd()) -
-                              SM.getFileOffset(ExtractionRng->getBegin());
-  return tooling::Replacement(SM, ExtractionRng->getBegin(), ExtractionLength,
-                              VarName);
+ExtractionContext::replaceWithVar(SourceRange Chars,
+                                  llvm::StringRef VarName) const {
+  unsigned ExtractionLength =
+      SM.getFileOffset(Chars.getEnd()) - SM.getFileOffset(Chars.getBegin());
+  return tooling::Replacement(SM, Chars.getBegin(), ExtractionLength, VarName);
 }
 // returns the Replacement for declaring a new variable storing the extraction
 tooling::Replacement
-ExtractionContext::insertDeclaration(llvm::StringRef VarName) const {
-  const llvm::Optional<SourceRange> ExtractionRng =
-      toHalfOpenFileRange(SM, Ctx.getLangOpts(), getExpr()->getSourceRange());
-  assert(ExtractionRng && "ExtractionRng should not be null");
-  llvm::StringRef ExtractionCode = toSourceCode(SM, *ExtractionRng);
+ExtractionContext::insertDeclaration(llvm::StringRef VarName,
+                                     SourceRange InitializerChars) const {
+  llvm::StringRef ExtractionCode = toSourceCode(SM, InitializerChars);
   const SourceLocation InsertionLoc =
       toHalfOpenFileRange(SM, Ctx.getLangOpts(),
                           InsertionPoint->getSourceRange())
@@ -193,6 +197,162 @@
   return tooling::Replacement(SM, InsertionLoc, 0, ExtractedVarDecl);
 }
 
+// Helpers for handling "binary subexpressions" like a + [[b + c]] + d.
+//
+// These are special, because the formal AST doesn't match what users expect:
+// - the AST is ((a + b) + c) + d, so the ancestor expression is `a + b + c`.
+// - but extracting `b + c` is reasonable, as + is (mathematically) associative.
+//
+// So we try to support these cases with some restrictions:
+//  - the operator must be associative
+//  - no mixing of operators is allowed
+//  - we don't look inside macro expansions in the subexpressions
+//  - we only adjust the extracted range, so references in the unselected parts
+//    of the AST expression (e.g. `a`) are still considered referenced for
+//    the purposes of calculating the insertion point.
+namespace {
+
+// Information extracted about a binary operator encounted in a SelectionTree.
+// It can represent either an overloaded or built-in operator.
+struct ParsedBinaryOperator {
+  BinaryOperatorKind Kind;
+  bool OperatorSelected = false;
+  llvm::SmallVector<const SelectionTree::Node*, 8> SelectedOperands;
+
+  // If N is a binary operator, populate this and return true.
+  bool parse(const SelectionTree::Node &N) {
+    if (const BinaryOperator *Op =
+        llvm::dyn_cast_or_null<BinaryOperator>(N.ASTNode.get<Expr>())) {
+      Kind = Op->getOpcode();
+      // FIXME: this could also be whitespace around the operand.
+      OperatorSelected = N.Selected;
+      SelectedOperands = N.Children;
+      return true;
+    }
+    if (const CXXOperatorCallExpr *Op =
+            llvm::dyn_cast_or_null<CXXOperatorCallExpr>(
+                N.ASTNode.get<Expr>())) {
+      if (Op->isInfixBinaryOp()) {
+        Kind = BinaryOperator::getOverloadedOpcode(Op->getOperator());
+        for (const auto* Child : N.Children) {
+          const Expr *E = Child->ASTNode.get<Expr>();
+          assert(E && "callee and args should be Exprs!");
+          if (E == Op->getArg(0) || E == Op->getArg(1))
+            SelectedOperands.push_back(Child);
+          else
+            OperatorSelected = true;
+        }
+        return true;
+      }
+    }
+    return false;
+  }
+
+  bool associative() const {
+    switch (Kind) {
+    case BO_Add:
+    case BO_Mul:
+    case BO_And:
+    case BO_Or:
+    case BO_Xor:
+    case BO_LAnd:
+    case BO_LOr:
+      return true;
+    default:
+      return false;
+    }
+  }
+};
+
+// If have an associative operator at the top level, then we must find
+// the start point (rightmost in LHS) and end point (leftmost in RHS).
+// We can only descend into subtrees where the operator matches and the
+// operator itself is not selected.
+//
+// e.g. for a + [[b + c]] + d
+//        +
+//       / \
+//  N-> +   d
+//     / \
+// X->+   c <- End
+//   / \
+//  a   b <- Start
+// It's critical that the operator marked X is unselected, otherwise the
+// correct start point would be X itself.
+const SelectionTree::Node *getEndpoint(const SelectionTree::Node &N,
+                                       BinaryOperatorKind OuterOp, bool IsStart,
+                                       const SourceManager &SM) {
+  // If N is not a binary operator of the desired type, we have to stop here.
+  ParsedBinaryOperator Op;
+  if (!Op.parse(N.ignoreImplicit()) || Op.Kind != OuterOp)
+    return &N;
+
+  // Don't support crossing macro boundaries, that way lies madness.
+  auto FileOf = [&](const SelectionTree::Node N) {
+    return SM.getFileID(N.ASTNode.getSourceRange().getBegin());
+  };
+  FileID F = FileOf(N);
+  for (const SelectionTree::Node* Child : Op.SelectedOperands)
+    if (FileOf(*Child) != F)
+      return &N;
+
+  // Assume IsStart is true, so we want the first chunk of the subexpr.
+  // Because we're in a subtree of a Op selection, and we've disallowed
+  // crossing macro boundaries, the selection is a suffix of N.
+  // this first chunk must also be selected.
+  //
+  // The cases:
+  // 1. Operator is selected. (Implies RHS is completely selected)
+  //    1a): RHS is selected but LHS is not. Return RHS.
+  //    1b): RHS and part of LHS is selected. Recurse into LHS to find start.
+  // 2. Operator is not selected. (Implies LHS is not selected)
+  //    2a): RHS is not selected: impossible, as selection would be empty.
+  //    2b): Some of RHS is selected. Recurse into RHS to find start.
+  //
+  // Because only the selected parts are in the SelectionTree, it's tricky to
+  // identify RHS/LHS (there can be 0/1/2 children).
+  // However knowing the selection is a suffix eliminates enough cases to make
+  // it possible.
+  //
+  // (If IsStart is false, we reverse LHS/RHS everywhere)
+  assert(!Op.SelectedOperands.empty() && "got only operator on one side!");
+  assert(Op.SelectedOperands.size() <= 2 && "Funny-shaped binary operator!");
+  if (Op.OperatorSelected) {
+    if (Op.SelectedOperands.size() == 1) // Case 1a
+      return Op.SelectedOperands.front();
+    // Case 1b
+    return getEndpoint(IsStart ? *Op.SelectedOperands.front()
+                               : *Op.SelectedOperands.back(),
+                       OuterOp, IsStart, SM);
+  } else {
+    assert(Op.SelectedOperands.size() == 1 &&
+           "selected both sides but not operator!");
+    return getEndpoint(*Op.SelectedOperands.front(), OuterOp, IsStart, SM);
+  }
+}
+
+} // namespace
+
+SourceRange ExtractionContext::getExtractionChars() const {
+  ParsedBinaryOperator Op;
+  // Special case: we're extracting an associative binary subexpression.
+  if (Op.parse(*ExprNode) && Op.associative() &&
+      Op.SelectedOperands.size() == 2) {
+    const SelectionTree::Node *LHS =
+        getEndpoint(*Op.SelectedOperands.front(), Op.Kind, true, SM);
+    const SelectionTree::Node *RHS =
+        getEndpoint(*Op.SelectedOperands.back(), Op.Kind, true, SM);
+    return SourceRange(toHalfOpenFileRange(SM, Ctx.getLangOpts(),
+                                           LHS->ASTNode.getSourceRange())
+                           ->getBegin(),
+                       toHalfOpenFileRange(SM, Ctx.getLangOpts(),
+                                           RHS->ASTNode.getSourceRange())
+                           ->getEnd());
+  }
+  // Usual case: we're extracting the whole expression.
+  return *toHalfOpenFileRange(SM, Ctx.getLangOpts(), Expr->getSourceRange());
+}
+
 /// Extracts an expression to the variable dummy
 /// Before:
 /// int x = 5 + 4 * 3;
@@ -230,11 +390,12 @@
   tooling::Replacements Result;
   // FIXME: get variable name from user or suggest based on type
   std::string VarName = "dummy";
+  SourceRange Range = Target->getExtractionChars();
   // insert new variable declaration
-  if (auto Err = Result.add(Target->insertDeclaration(VarName)))
+  if (auto Err = Result.add(Target->insertDeclaration(VarName, Range)))
     return std::move(Err);
   // replace expression with variable name
-  if (auto Err = Result.add(Target->replaceWithVar(VarName)))
+  if (auto Err = Result.add(Target->replaceWithVar(Range, VarName)))
     return std::move(Err);
   return Effect::applyEdit(Result);
 }
Index: clang-tools-extra/clangd/Selection.h
===================================================================
--- clang-tools-extra/clangd/Selection.h
+++ clang-tools-extra/clangd/Selection.h
@@ -96,6 +96,9 @@
     // Walk up the AST to get the DeclContext of this Node,
     // which is not the node itself.
     const DeclContext& getDeclContext() const;
+    // If this node is a wrapper with no syntax (e.g. implicit cast), return
+    // its contents. (If multiple wrappers are present, unwraps all of them).
+    const Node& ignoreImplicit() const;
   };
   // The most specific common ancestor of all the selected nodes.
   // If there is no selection, this is nullptr.
Index: clang-tools-extra/clangd/Selection.cpp
===================================================================
--- clang-tools-extra/clangd/Selection.cpp
+++ clang-tools-extra/clangd/Selection.cpp
@@ -430,5 +430,12 @@
   llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
 }
 
+const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
+  if (Children.size() == 1 &&
+      Children.front()->ASTNode.getSourceRange() == ASTNode.getSourceRange())
+    return Children.front()->ignoreImplicit();
+  return *this;
+}
+
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to