hokein updated this revision to Diff 231541.
hokein added a comment.

rebase


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D70594/new/

https://reviews.llvm.org/D70594

Files:
  clang-tools-extra/clangd/refactor/Rename.cpp
  clang-tools-extra/clangd/refactor/Rename.h
  clang-tools-extra/clangd/unittests/RenameTests.cpp

Index: clang-tools-extra/clangd/unittests/RenameTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/RenameTests.cpp
+++ clang-tools-extra/clangd/unittests/RenameTests.cpp
@@ -11,6 +11,7 @@
 #include "TestTU.h"
 #include "index/Ref.h"
 #include "refactor/Rename.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Tooling/Core/Replacement.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "gmock/gmock.h"
@@ -695,6 +696,111 @@
             expectedResult(Code, expectedResult(T, "abc")));
 }
 
+TEST(CrossFileRenameTests, RangePatching) {
+  struct {
+    llvm::StringRef LatestCode;
+    llvm::StringRef IndexCode;
+    RangeMatcher::MatchType ExpectedMatch;
+  } Tests [] = {
+    {
+      // Equal
+      R"([[foo]])",
+      R"([[foo]])",
+      RangeMatcher::MatchType::Subset,
+    },
+    {
+      // proper subset
+      R"([[foo]] [[foo]])",
+      R"([[foo]])",
+      RangeMatcher::MatchType::Subset,
+    },
+    {
+      // same line, near-miss column.
+      R"([[foo]] [[foo]])",
+      R"(  [[foo]] [[foo]])",
+      RangeMatcher::MatchType::NearMiss,
+    },
+    {
+      // same line, near-miss column.
+      R"([[foo]] [[foo]])",
+      R"([[foo]]   [[foo]])",
+      RangeMatcher::MatchType::NearMiss,
+    },
+    {
+      // same column, near-miss line
+      R"(
+        [[foo]] [[foo]]
+      )",
+      R"(
+
+        [[foo]] [[foo]]
+      )",
+      RangeMatcher::MatchType::NearMiss,
+    },
+
+
+    {
+      // same line, but column diff > near-miss threshold
+      R"([[foo]])",
+      R"(       [[foo]])",
+      RangeMatcher::MatchType::NoMatch,
+    },
+    {
+      // same column, but line diff > near-miss threshold
+      R"(
+        [[foo]] [[foo]]
+      )",
+      R"(
+
+
+
+        [[foo]] [[foo]]
+      )",
+      RangeMatcher::MatchType::NoMatch,
+    },
+    {
+      // two lines with different number of elements are not near-miss.
+      R"(
+        [[foo]] [[foo]]
+      )",
+      R"(
+        [[foo]]
+                [[foo]]
+      )",
+      RangeMatcher::MatchType::NoMatch,
+    }
+  };
+  LangOptions LangOpts;
+  LangOpts.CPlusPlus = true;
+  RangeMatcher Matcher(LangOpts);
+
+  for (const auto &T : Tests) {
+    auto IndexOccurrences = Annotations(T.IndexCode).ranges();
+    auto Candidates = Annotations(T.LatestCode).ranges();
+    EXPECT_EQ(T.ExpectedMatch,
+              RangeMatcher::match(IndexOccurrences, Candidates))
+        << T.LatestCode << ":" << T.IndexCode;
+
+
+    auto AuthoritativeResult = Matcher.getBest(Annotations(T.LatestCode).code(),
+                                               "foo", IndexOccurrences);
+    switch (T.ExpectedMatch) {
+    case RangeMatcher::MatchType::NoMatch:
+      EXPECT_EQ(llvm::None, AuthoritativeResult);
+      break;
+    case RangeMatcher::MatchType::Subset:
+      ASSERT_TRUE(AuthoritativeResult);
+      EXPECT_EQ(IndexOccurrences, *AuthoritativeResult);
+      break;
+    case RangeMatcher::MatchType::NearMiss:
+      ASSERT_TRUE(AuthoritativeResult);
+
+      EXPECT_EQ(Candidates, *AuthoritativeResult);
+      break;
+    }
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/refactor/Rename.h
===================================================================
--- clang-tools-extra/clangd/refactor/Rename.h
+++ clang-tools-extra/clangd/refactor/Rename.h
@@ -55,6 +55,49 @@
                                      std::vector<Range> Occurrences,
                                      llvm::StringRef NewName);
 
+/// Check whether the index results are good enough to perform the cross-file
+/// rename.
+///
+/// Index may be stale at the point of renaming, this class is designed to
+/// mitigate the issue of staleness index that may hurt the quality of rename
+/// results.
+///
+/// Techniques:
+///   - lex the latest file content (from dirty buffer/disk) to find all rename
+///     candidates, this yields a superset candidates
+///   - apply range patching heuristics to generate "authoritative" rename
+///     candidates. Scenarios that we are confident:
+///      (a) index returns a subset of candidates
+///        - fully equal, we are sure the index is up-to-date
+///        - proper subset, index is correct in most cases? there may be false
+///          positives (e.g. candidates got appended), but rename is still safe
+///      (b) index returns non-candidate results
+///        - check whether they are near-miss (same line/column)
+class RangeMatcher {
+public:
+  enum class MatchType : uint8_t {
+    NoMatch,
+
+    Subset,
+    NearMiss,
+  };
+  RangeMatcher(const LangOptions &LangOpts) : LangOpts(LangOpts) {}
+
+  /// Infers the \p IndexOccurrens is good enough, and returns the
+  /// "authoritative" candidates for renaming the \p Identifier in the \p Code.
+  /// Returns None if the \p IndexOccurrences is not good enough.
+  llvm::Optional<std::vector<Range>>
+  getBest(llvm::StringRef Code, llvm::StringRef Identifier,
+          std::vector<Range> IndexOccurrences);
+
+  /// Detects the two inputs are matched. This is an implementation detail of
+  /// \p getBest, exposing for testing only.
+  static MatchType match(llvm::ArrayRef<Range> LHS, llvm::ArrayRef<Range> RHS);
+
+private:
+  const LangOptions &LangOpts;
+};
+
 } // namespace clangd
 } // namespace clang
 
Index: clang-tools-extra/clangd/refactor/Rename.cpp
===================================================================
--- clang-tools-extra/clangd/refactor/Rename.cpp
+++ clang-tools-extra/clangd/refactor/Rename.cpp
@@ -16,11 +16,15 @@
 #include "index/SymbolCollector.h"
 #include "clang/AST/DeclCXX.h"
 #include "clang/AST/DeclTemplate.h"
+#include "clang/Basic/LangOptions.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Tooling/Refactoring/Rename/USRFindingAction.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
+#include <algorithm>
+#include <cmath>
 
 namespace clang {
 namespace clangd {
@@ -328,8 +332,6 @@
 // as the file content we rename on, and fallback to file content on disk if
 // there is no dirty buffer.
 //
-// FIXME: add range patching heuristics to detect staleness of the index, and
-//        report to users.
 // FIXME: our index may return implicit references, which are non-eligitble
 //        for rename, we should filter out these references.
 llvm::Expected<FileEdits> renameOutsideFile(
@@ -341,6 +343,7 @@
   if (!AffectedFiles)
     return AffectedFiles.takeError();
   FileEdits Results;
+  RangeMatcher RMatcher(RenameDecl.getASTContext().getLangOpts());
   for (auto &FileAndOccurrences : *AffectedFiles) {
     llvm::StringRef FilePath = FileAndOccurrences.first();
 
@@ -349,9 +352,20 @@
       elog("Fail to read file content: {0}", AffectedFileCode.takeError());
       continue;
     }
+
+    auto RenameCandidates =
+        RMatcher.getBest(*AffectedFileCode, RenameDecl.getNameAsString(),
+                         std::move(FileAndOccurrences.second));
+    if (!RenameCandidates) {
+      return llvm::make_error<llvm::StringError>(
+          llvm::formatv("index results don't match the content of file {0}, "
+                        "the index may be stale.",
+                        FilePath),
+          llvm::inconvertibleErrorCode());
+    }
     auto RenameEdit =
         buildRenameEdit(FilePath, *AffectedFileCode,
-                        std::move(FileAndOccurrences.second), NewName);
+                        std::move(*RenameCandidates), NewName);
     if (!RenameEdit) {
       return llvm::make_error<llvm::StringError>(
           llvm::formatv("fail to build rename edit for file {0}: {1}", FilePath,
@@ -364,6 +378,102 @@
   return Results;
 }
 
+// Returns true if methods of Subset are all included in Superset.
+bool subset(llvm::ArrayRef<Range> Subset, llvm::ArrayRef<Range> Superset) {
+  assert(std::is_sorted(Subset.begin(), Subset.end()));
+  assert(std::is_sorted(Superset.begin(), Superset.end()));
+  auto ItSubset = Subset.begin();
+  auto ItSuperset = Superset.begin();
+  while (ItSubset != Subset.end() && ItSuperset != Superset.end()) {
+    if (*ItSuperset == *ItSubset) {
+      ++ItSubset;
+      ++ItSuperset;
+    } else if (*ItSuperset < *ItSubset) {
+      ++ItSuperset;
+    } else {
+      // we are sure Subset can't be a subset of Superset now, bail out.
+      break;
+    }
+  }
+  return ItSubset == Subset.end();
+}
+
+// A forward iterator for reading lines from a sorted array of "Range".
+class LineIterator {
+public:
+  explicit LineIterator(ArrayRef<Range> All) : All(All) {
+    CurrentLine = {All.begin(), 0ul};
+    advance();
+  }
+  // returns current line number.
+  int lineNumber() const { return LineNumber; }
+  bool isEnd() const { return CurrentLine.begin() == All.end(); }
+
+  const ArrayRef<Range> *operator->() const { return &CurrentLine; }
+  LineIterator &operator++() {
+    advance();
+    return *this;
+  }
+
+private:
+  void advance() {
+    auto NextStart = CurrentLine.end();
+    LineNumber = NextStart->start.line;
+    CurrentLine =
+        ArrayRef<Range>(NextStart, All.end()).take_while([&](const Range &R) {
+          return R.start.line == LineNumber;
+        });
+  }
+  int LineNumber = 0;
+  ArrayRef<Range> CurrentLine;
+  ArrayRef<Range> All;
+};
+
+// Check whether two lines are close enough to each other, they are considered
+// near-miss if
+//   1) same line number, and difference between columns is small, or
+//   2) same columns, difference between lines is small
+bool lineNearMiss(LineIterator LHS, LineIterator RHS) {
+  assert(!LHS.isEnd() && !RHS.isEnd());
+  // Bail out early if the number of elements is different.
+  if (LHS->size() != RHS->size())
+    return false;
+
+  static constexpr int KNearMissThrehold = 2;
+  // Same line, check whether columns are close enough.
+  if (LHS.lineNumber() == RHS.lineNumber()) {
+    return std::equal(LHS->begin(), LHS->end(), RHS->begin(),
+                      [&](const Range &R1, const Range &R2) {
+                        return abs(R1.start.character - R2.start.character) <=
+                               KNearMissThrehold;
+                      });
+  }
+  // Diff line, check whether the lines are close enough, and columns are
+  // the same.
+  if (abs(LHS.lineNumber() - RHS.lineNumber()) > KNearMissThrehold)
+    return false;
+  return std::equal(LHS->begin(), LHS->end(), RHS->begin(),
+                    [](const Range &R1, const Range &R2) {
+                      return R1.start.character == R2.start.character;
+                    });
+}
+// Check whether two inputs LHS and RHS are close enough to each other.
+// The LHS and RHS are considered near-miss if only if each line of them is
+// near-miss.
+bool nearMiss(ArrayRef<Range> LHS, ArrayRef<Range> RHS) {
+  if (LHS.size() != RHS.size())
+    return false;
+  LineIterator ItLHS(LHS);
+  LineIterator ItRHS(RHS);
+  while (!ItLHS.isEnd() && !ItRHS.isEnd()) {
+    if (!lineNearMiss(ItLHS, ItRHS))
+      return false;
+    ++ItLHS;
+    ++ItRHS;
+  }
+  return ItLHS.isEnd() && ItRHS.isEnd();
+}
+
 } // namespace
 
 llvm::Expected<FileEdits> rename(const RenameInputs &RInputs) {
@@ -499,5 +609,34 @@
   return Edit(InitialCode, std::move(RenameEdit));
 }
 
+llvm::Optional<std::vector<Range>>
+RangeMatcher::getBest(llvm::StringRef Code, llvm::StringRef Identifier,
+                      std::vector<Range> IndexOccurrences) {
+  std::vector<Range> Candidates =
+      collectIdentifierRanges(Identifier, Code, LangOpts);
+
+  llvm::sort(Candidates);
+  llvm::sort(IndexOccurrences);
+
+  auto M = match(IndexOccurrences, Candidates);
+  if (M == MatchType::Subset)
+    return std::move(IndexOccurrences); // RVO is disallowed for parameters.
+  if (M == MatchType::NearMiss)
+    return Candidates;
+  return None;
+}
+
+RangeMatcher::MatchType RangeMatcher::match(llvm::ArrayRef<Range> LHS,
+                                            llvm::ArrayRef<Range> RHS) {
+  assert(std::is_sorted(LHS.begin(), LHS.end()) && "LHS input must be sorted");
+  assert(std::is_sorted(RHS.begin(), RHS.end()) && "RHS input must be sorted");
+
+  if (subset(LHS, RHS))
+    return MatchType::Subset;
+  if (nearMiss(LHS, RHS))
+    return MatchType::NearMiss;
+  return MatchType::NoMatch;
+}
+
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to