[PATCH] D64475: [clangd] Duplicate lines of semantic highlightings sent removed.

Johan Vikström via Phabricator via cfe-commits Wed, 10 Jul 2019 04:31:28 -0700

jvikstrom created this revision.
jvikstrom added reviewers: hokein, sammccall, ilya-biryukov.
Herald added subscribers: cfe-commits, kadircet, arphaman, mgrang, jkorous, 
MaskRay.
Herald added a project: clang.


Added a class for diffing highlightings and removing duplicate lines. 
Integrated into the highlighting generation flow. Only works correctly if all 
tokens are on a single line. Also returns empty lines if the IDE should remove 
previous highlightings on a line.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D64475

Files:
  clang-tools-extra/clangd/ClangdServer.cpp
  clang-tools-extra/clangd/SemanticHighlighting.cpp
  clang-tools-extra/clangd/SemanticHighlighting.h
  clang-tools-extra/clangd/test/semantic-highlighting.test
  clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp

Index: clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
+++ clang-tools-extra/clangd/unittests/SemanticHighlightingTests.cpp
@@ -29,14 +29,17 @@
   return Tokens;
 }
 
-void checkHighlightings(llvm::StringRef Code) {
+std::tuple<ParsedAST, std::vector<HighlightingToken>,
+           std::vector<HighlightingToken>>
+getHighlightingsAnnotated(llvm::StringRef Code) {
   Annotations Test(Code);
   auto AST = TestTU::withCode(Test.code()).build();
   static const std::map<HighlightingKind, std::string> KindToString{
       {HighlightingKind::Variable, "Variable"},
       {HighlightingKind::Function, "Function"},
       {HighlightingKind::Class, "Class"},
-      {HighlightingKind::Enum, "Enum"}};
+      {HighlightingKind::Enum, "Enum"},
+      {HighlightingKind::Empty, "Empty"}};
   std::vector<HighlightingToken> ExpectedTokens;
   for (const auto &KindString : KindToString) {
     std::vector<HighlightingToken> Toks = makeHighlightingTokens(
@@ -45,6 +48,14 @@
   }
 
   auto ActualTokens = getSemanticHighlightings(AST);
+  return {std::move(AST), ActualTokens, ExpectedTokens};
+}
+
+void checkHighlightings(llvm::StringRef Code) {
+  std::vector<HighlightingToken> ActualTokens;
+  std::vector<HighlightingToken> ExpectedTokens;
+  std::tie(std::ignore, ActualTokens, ExpectedTokens) =
+      getHighlightingsAnnotated(Code);
   EXPECT_THAT(ActualTokens, testing::UnorderedElementsAreArray(ExpectedTokens));
 }
 
@@ -161,6 +172,68 @@
   EXPECT_EQ(ActualResults, ExpectedResults);
 }
 
+TEST(SemanticHighlighting, HighlightingDiffer) {
+  std::vector<std::vector<const char *>> TestCases{{
+                                                       R"cpp(
+    struct $Class[[Alpha]] {
+      double SomeVariable = 9483.301;
+    };
+    struct $Class[[Beta]] {};
+    int $Variable[[A]] = 121;
+    $Class[[Alpha]] $Variable[[Var]];
+  )cpp",
+                                                       R"cpp(
+    struct Alpha {
+      double SomeVariable = 9483.301;
+    };
+    struct Beta   {}; // Some changes that don't change the tokens' position.
+    intA$Empty[[]] = 121;
+    $Class[[Beta]] $Variable[[Var]];
+  )cpp"},
+                                                   {
+                                                       R"cpp(
+      int $Variable[[A]] = 121; int $Variable[[B]];
+    )cpp",
+                                                       R"cpp(
+      intA = 121; int $Variable[[B]];
+    )cpp"},
+                                                   {
+                                                       R"cpp(
+      int $Variable[[A]] = 213;
+    )cpp",
+                                                       R"cpp(
+      int C = 312;
+      int $Variable[[B]] = 213;
+    )cpp",
+                                                       R"cpp(
+      int C = 213;
+      int B = 412;
+    )cpp"},
+                                                   {
+                                                       R"cpp(
+      int $Variable[[A]];
+    )cpp",
+                                                       R"cpp(
+      int $Variable[[A]]; int $Variable[[B]];
+    )cpp"}};
+
+  for (auto Test : TestCases) {
+    HighlightingDiffer Differ;
+    for (const auto &Code : Test) {
+      ParsedAST AST =
+          TestTU::withCode("").build(); // Can't default construct a ParsedAST.
+      std::vector<HighlightingToken> CompleteTokens;
+      std::vector<HighlightingToken> ExpectedTokens;
+      std::tie(AST, CompleteTokens, ExpectedTokens) =
+          getHighlightingsAnnotated(Code);
+      std::vector<HighlightingToken> DiffedTokens =
+          Differ.diffHighlightings(AST, CompleteTokens);
+      EXPECT_THAT(DiffedTokens,
+                  testing::UnorderedElementsAreArray(ExpectedTokens));
+    }
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang
Index: clang-tools-extra/clangd/test/semantic-highlighting.test
===================================================================
--- clang-tools-extra/clangd/test/semantic-highlighting.test
+++ clang-tools-extra/clangd/test/semantic-highlighting.test
@@ -34,6 +34,21 @@
 # CHECK-NEXT:  }
 # CHECK-NEXT:}
 ---
+{"jsonrpc":"2.0","method":"textDocument/didOpen","params":{"textDocument":{"uri":"test:///foo.cpp","languageId":"cpp","version":1,"text":"int x = 2; int;\nint y = 2;"}}}
+#      CHECK:  "method": "textDocument/semanticHighlighting",
+# CHECK-NEXT:  "params": {
+# CHECK-NEXT:    "lines": [
+# CHECK-NEXT:      {
+# CHECK-NEXT:        "line": 1,
+# CHECK-NEXT:        "tokens": "AAAABAABAAA="
+# CHECK-NEXT:      }
+# CHECK-NEXT:   ],
+# CHECK-NEXT:    "textDocument": {
+# CHECK-NEXT:      "uri": "file:///clangd-test/foo.cpp"
+# CHECK-NEXT:    }
+# CHECK-NEXT:  }
+# CHECK-NEXT:}
+---
 {"jsonrpc":"2.0","id":3,"method":"shutdown"}
 ---
 {"jsonrpc":"2.0","method":"exit"}
Index: clang-tools-extra/clangd/SemanticHighlighting.h
===================================================================
--- clang-tools-extra/clangd/SemanticHighlighting.h
+++ clang-tools-extra/clangd/SemanticHighlighting.h
@@ -24,6 +24,7 @@
 namespace clangd {
 
 enum class HighlightingKind {
+  Empty = -1,
   Variable = 0,
   Function,
   Class,
@@ -39,6 +40,7 @@
 };
 
 bool operator==(const HighlightingToken &Lhs, const HighlightingToken &Rhs);
+bool operator!=(const HighlightingToken &Lhs, const HighlightingToken &Rhs);
 
 // Returns all HighlightingTokens from an AST. Only generates highlights for the
 // main AST.
@@ -52,6 +54,23 @@
 std::vector<SemanticHighlightingInformation>
 toSemanticHighlightingInformation(llvm::ArrayRef<HighlightingToken> Tokens);
 
+/// Class for getting the input highlighting lines that differ the previous
+/// highlightings.
+class HighlightingDiffer {
+  std::string PrevPath;
+  std::vector<HighlightingToken> PrevHighlightings;
+  std::mutex PrevMutex;
+
+public:
+  /// Removes every line where the \c Highlightings are the same as the
+  /// respective line in the previous highlightings this method was called with.
+  /// If the main AST file differs from the previous AST this was called with
+  /// the \c Highlightings are returned as is.
+  std::vector<HighlightingToken>
+  diffHighlightings(const ParsedAST &AST,
+                    std::vector<HighlightingToken> Highlightings);
+};
+
 } // namespace clangd
 } // namespace clang
 
Index: clang-tools-extra/clangd/SemanticHighlighting.cpp
===================================================================
--- clang-tools-extra/clangd/SemanticHighlighting.cpp
+++ clang-tools-extra/clangd/SemanticHighlighting.cpp
@@ -78,6 +78,8 @@
 
 private:
   void addToken(SourceLocation Loc, const NamedDecl *D) {
+    if (D->isInvalidDecl())
+      return;
     if (D->getDeclName().isIdentifier() && D->getName().empty())
       // Don't add symbols that don't have any length.
       return;
@@ -164,11 +166,125 @@
   llvm::support::endian::write16be(Buf.data(), I);
   OS.write(Buf.data(), Buf.size());
 }
+
+bool compareHighlightingToken(const HighlightingToken &Lhs,
+                              const HighlightingToken &Rhs) {
+  return Lhs.R.start.line == Rhs.R.start.line
+             ? Lhs.R.start.character < Rhs.R.start.character
+             : Lhs.R.start.line < Rhs.R.start.line;
+}
 } // namespace
 
+std::vector<HighlightingToken> HighlightingDiffer::diffHighlightings(
+    const ParsedAST &AST, std::vector<HighlightingToken> Highlightings) {
+  std::sort(Highlightings.begin(), Highlightings.end(),
+            compareHighlightingToken);
+
+  auto &SM = AST.getASTContext().getSourceManager();
+  const FileEntry *CurrentFileEntry = SM.getFileEntryForID(SM.getMainFileID());
+  std::string CurrentPath = CurrentFileEntry->tryGetRealPathName().str() +
+                            CurrentFileEntry->getName().str();
+  // FIXME: Want this to be LIFO and when a new highlighting request is added
+  // all the others in the queue should be removed. (Don't need to generate
+  // highlightings for old ASTs)
+  std::lock_guard<std::mutex> Lock(PrevMutex);
+  // The files are different therefore all highlights differ.
+  if (PrevPath != CurrentPath) {
+    PrevPath = CurrentPath;
+    PrevHighlightings = Highlightings;
+    return Highlightings;
+  }
+
+  // FIXME: Want to handle the case when there are only one or multiple new
+  // lines in a better way than saying all highlightings differ as the IDE
+  // should handle that case.
+  std::vector<HighlightingToken> HighlightingsCopy =
+      Highlightings; // Copy the original sorted highlightings to save as
+                     // previous highlightings at the end.
+  // The invariant for this loop is: all Highlightings with index < I are on a
+  // line where there is a difference in any token on the line between
+  // PrevHighlightings and Highlightings.
+  int I = 0, PI = 0, EndI = Highlightings.size(),
+      EndP = PrevHighlightings.size();
+  while (I < EndI && PI < EndP) {
+    const HighlightingToken &Current = Highlightings[I];
+    const HighlightingToken &Prev = PrevHighlightings[PI];
+    // FIXME: There's an edge case when tokens span multiple lines. If the first
+    // token on the line started on a line above the current one and the rest of
+    // the line is the equal to the previous one than we will remove all
+    // highlights but the ones for the token spanning multiple lines. This means
+    // that when we get into the LSP layer the only highlights that will be
+    // visible are the ones for the token spanning multiple lines.
+    // Example:
+    // EndOfMultilineToken  Token Token Token
+    // If "Token Token Token" don't differ from previously the line is
+    // incorrectly removed.
+
+    if (Current.R.start.line == Prev.R.start.line) {
+      // Find all other tokens starting on this line and if none differ and the
+      // lines have equal length, remove this line from the highlightings.
+      int PrevLineEndIdx = PI + 1;
+      int CurrentLineEndIdx = I + 1;
+      // FIXME: The edge case about tokens spanning multiple lines exists here
+      // as well only this time there will be no highlightings for the token
+      // spanning multiple lines if this line is differing.
+
+      // Find the end index of the current and previous lines.
+      while (CurrentLineEndIdx < EndI &&
+             Highlightings[CurrentLineEndIdx].R.start.line == Prev.R.start.line)
+        ++CurrentLineEndIdx;
+      while (PrevLineEndIdx < EndP &&
+             PrevHighlightings[PrevLineEndIdx].R.start.line ==
+                 Prev.R.start.line)
+        ++PrevLineEndIdx;
+
+      int PrevLen = PrevLineEndIdx - PI;
+      int CurrentLen = CurrentLineEndIdx - I;
+      if (PrevLen == CurrentLen) {
+        bool LineDiffers = false;
+        for (; I < CurrentLineEndIdx; ++I, ++PI)
+          if (Highlightings[I] != PrevHighlightings[PI])
+            LineDiffers = true;
+
+        if (!LineDiffers) {
+          Highlightings.erase(Highlightings.begin() + I - CurrentLen,
+                              Highlightings.begin() + I);
+          // Move I back to the position it should be in the highlightings
+          // vector so no tokens are skipped because of the erasing.
+          CurrentLineEndIdx -= CurrentLen;
+          // Don't want to start processing any potential pushed Empty tokens.
+          EndI -= CurrentLen;
+        }
+      }
+
+      // No tokens on this line has to be processed again as any duplicates on
+      // this line have been removed.
+      I = CurrentLineEndIdx;
+      PI = PrevLineEndIdx;
+    } else if (Prev.R.start.line < Current.R.start.line) {
+      // There is a token in the previous highlightings that do not exist in the
+      // new highlightings. This token must be replaced with an empty token in
+      // the highlightings. Otherwise IDEs might have cached the old incorrect
+      // token and will start displaying incorrect highlightings.
+      Highlightings.push_back(
+          {HighlightingKind::Empty, {Prev.R.start, Prev.R.start}});
+      PI++;
+    } else
+      // The token does not exist in the previous highlightings.
+      I++;
+  }
+
+  PrevHighlightings = HighlightingsCopy;
+  PrevPath = CurrentPath;
+  return Highlightings;
+}
+
 bool operator==(const HighlightingToken &Lhs, const HighlightingToken &Rhs) {
   return Lhs.Kind == Rhs.Kind && Lhs.R == Rhs.R;
 }
+bool operator!=(const HighlightingToken &Lhs, const HighlightingToken &Rhs) {
+  return !(Lhs == Rhs);
+}
 
 std::vector<HighlightingToken> getSemanticHighlightings(ParsedAST &AST) {
   return HighlightingTokenCollector(AST).collectTokens();
@@ -191,6 +307,12 @@
     llvm::SmallVector<char, 128> LineByteTokens;
     llvm::raw_svector_ostream OS(LineByteTokens);
     for (const auto &Token : Line.second) {
+      // If a token is empty we need to ensure that the line exists in the
+      // SemanticHighlightingInformation to clear any previous IDE highlighting
+      // on that line. However if there are other tokens on the line it should
+      // not influence the highlighting on the line.
+      if (Token.Kind == HighlightingKind::Empty)
+        continue;
       // Writes the token to LineByteTokens in the byte format specified by the
       // LSP proposal. Described below.
       // |<---- 4 bytes ---->|<-- 2 bytes -->|<--- 2 bytes -->|
@@ -218,6 +340,8 @@
     return "entity.name.type.class.cpp";
   case HighlightingKind::Enum:
     return "entity.name.type.enum.cpp";
+  case HighlightingKind::Empty:
+    llvm_unreachable("must not pass Empty to the function");
   case HighlightingKind::NumKinds:
     llvm_unreachable("must not pass NumKinds to the function");
   }
Index: clang-tools-extra/clangd/ClangdServer.cpp
===================================================================
--- clang-tools-extra/clangd/ClangdServer.cpp
+++ clang-tools-extra/clangd/ClangdServer.cpp
@@ -64,7 +64,7 @@
     if (FIndex)
       FIndex->updateMain(Path, AST);
     if (SemanticHighlighting)
-      DiagConsumer.onHighlightingsReady(Path, getSemanticHighlightings(AST));
+      DiagConsumer.onHighlightingsReady(Path, Differ.diffHighlightings(AST, getSemanticHighlightings(AST)));
   }
 
   void onDiagnostics(PathRef File, std::vector<Diag> Diags) override {
@@ -79,6 +79,7 @@
   FileIndex *FIndex;
   DiagnosticsConsumer &DiagConsumer;
   bool SemanticHighlighting;
+  HighlightingDiffer Differ;
 };
 } // namespace

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D64475: [clangd] Duplicate lines of semantic highlightings sent removed.

Reply via email to