This revision was automatically updated to reflect the committed changes.
Closed by commit rL332378: [clangd] Extract scoring/ranking logic, and shave 
yaks. (authored by sammccall, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D46524?vs=146591&id=146881#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D46524

Files:
  clang-tools-extra/trunk/clangd/CMakeLists.txt
  clang-tools-extra/trunk/clangd/CodeComplete.cpp
  clang-tools-extra/trunk/clangd/Quality.cpp
  clang-tools-extra/trunk/clangd/Quality.h
  clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
  clang-tools-extra/trunk/unittests/clangd/ClangdUnitTests.cpp
  clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
  clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp
  clang-tools-extra/trunk/unittests/clangd/TestFS.cpp
  clang-tools-extra/trunk/unittests/clangd/TestTU.cpp
  clang-tools-extra/trunk/unittests/clangd/TestTU.h
  clang-tools-extra/trunk/unittests/clangd/XRefsTests.cpp

Index: clang-tools-extra/trunk/clangd/CodeComplete.cpp
===================================================================
--- clang-tools-extra/trunk/clangd/CodeComplete.cpp
+++ clang-tools-extra/trunk/clangd/CodeComplete.cpp
@@ -20,6 +20,7 @@
 #include "FuzzyMatch.h"
 #include "Headers.h"
 #include "Logger.h"
+#include "Quality.h"
 #include "SourceCode.h"
 #include "Trace.h"
 #include "URI.h"
@@ -34,6 +35,9 @@
 #include "llvm/Support/Format.h"
 #include <queue>
 
+// We log detailed candidate here if you run with -debug-only=codecomplete.
+#define DEBUG_TYPE "codecomplete"
+
 namespace clang {
 namespace clangd {
 namespace {
@@ -192,35 +196,6 @@
   return Result;
 }
 
-// Produces an integer that sorts in the same order as F.
-// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
-uint32_t encodeFloat(float F) {
-  static_assert(std::numeric_limits<float>::is_iec559, "");
-  static_assert(sizeof(float) == sizeof(uint32_t), "");
-  constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
-
-  // Get the bits of the float. Endianness is the same as for integers.
-  uint32_t U;
-  memcpy(&U, &F, sizeof(float));
-  // IEEE 754 floats compare like sign-magnitude integers.
-  if (U & TopBit)    // Negative float.
-    return 0 - U;    // Map onto the low half of integers, order reversed.
-  return U + TopBit; // Positive floats map onto the high half of integers.
-}
-
-// Returns a string that sorts in the same order as (-Score, Name), for LSP.
-std::string sortText(float Score, llvm::StringRef Name) {
-  // We convert -Score to an integer, and hex-encode for readability.
-  // Example: [0.5, "foo"] -> "41000000foo"
-  std::string S;
-  llvm::raw_string_ostream OS(S);
-  write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
-            /*Width=*/2 * sizeof(Score));
-  OS << Name;
-  OS.flush();
-  return S;
-}
-
 /// Creates a `HeaderFile` from \p Header which can be either a URI or a literal
 /// include.
 static llvm::Expected<HeaderFile> toHeaderFile(StringRef Header,
@@ -251,33 +226,6 @@
   const CodeCompletionResult *SemaResult = nullptr;
   const Symbol *IndexResult = nullptr;
 
-  // Computes the "symbol quality" score for this completion. Higher is better.
-  float score() const {
-    float Score = 1;
-    if (IndexResult)
-      Score *= quality(*IndexResult);
-    if (SemaResult) {
-      // For now we just use the Sema priority, mapping it onto a 0-2 interval.
-      // That makes 1 neutral-ish, so we don't reward/penalize non-Sema results.
-      // Priority 80 is a really bad score.
-      Score *= 2 - std::min<float>(80, SemaResult->Priority) / 40;
-
-      switch (static_cast<CXAvailabilityKind>(SemaResult->Availability)) {
-      case CXAvailability_Available:
-        // No penalty.
-        break;
-      case CXAvailability_Deprecated:
-        Score *= 0.1f;
-        break;
-      case CXAvailability_NotAccessible:
-      case CXAvailability_NotAvailable:
-        Score = 0;
-        break;
-      }
-    }
-    return Score;
-  }
-
   // Builds an LSP completion item.
   CompletionItem build(StringRef FileName, const CompletionItemScores &Scores,
                        const CodeCompleteOptions &Opts,
@@ -346,6 +294,7 @@
     return I;
   }
 };
+using ScoredCandidate = std::pair<CompletionCandidate, CompletionItemScores>;
 
 // Determine the symbol ID for a Sema code completion result, if possible.
 llvm::Optional<SymbolID> getSymbolID(const CodeCompletionResult &R) {
@@ -552,50 +501,12 @@
   UniqueFunction<void()> ResultsCallback;
 };
 
-// Tracks a bounded number of candidates with the best scores.
-class TopN {
-public:
-  using value_type = std::pair<CompletionCandidate, CompletionItemScores>;
-  static constexpr size_t Unbounded = std::numeric_limits<size_t>::max();
-
-  TopN(size_t N) : N(N) {}
-
-  // Adds a candidate to the set.
-  // Returns true if a candidate was dropped to get back under N.
-  bool push(value_type &&V) {
-    bool Dropped = false;
-    if (Heap.size() >= N) {
-      Dropped = true;
-      if (N > 0 && greater(V, Heap.front())) {
-        std::pop_heap(Heap.begin(), Heap.end(), greater);
-        Heap.back() = std::move(V);
-        std::push_heap(Heap.begin(), Heap.end(), greater);
-      }
-    } else {
-      Heap.push_back(std::move(V));
-      std::push_heap(Heap.begin(), Heap.end(), greater);
-    }
-    assert(Heap.size() <= N);
-    assert(std::is_heap(Heap.begin(), Heap.end(), greater));
-    return Dropped;
-  }
-
-  // Returns candidates from best to worst.
-  std::vector<value_type> items() && {
-    std::sort_heap(Heap.begin(), Heap.end(), greater);
-    assert(Heap.size() <= N);
-    return std::move(Heap);
-  }
-
-private:
-  static bool greater(const value_type &L, const value_type &R) {
+struct ScoredCandidateGreater {
+  bool operator()(const ScoredCandidate &L, const ScoredCandidate &R) {
     if (L.second.finalScore != R.second.finalScore)
       return L.second.finalScore > R.second.finalScore;
     return L.first.Name < R.first.Name; // Earlier name is better.
   }
-
-  const size_t N;
-  std::vector<value_type> Heap; // Min-heap, comparator is greater().
 };
 
 class SignatureHelpCollector final : public CodeCompleteConsumer {
@@ -1035,7 +946,8 @@
                const SymbolSlab &IndexResults) {
     trace::Span Tracer("Merge and score results");
     // We only keep the best N results at any time, in "native" format.
-    TopN Top(Opts.Limit == 0 ? TopN::Unbounded : Opts.Limit);
+    TopN<ScoredCandidate, ScoredCandidateGreater> Top(
+        Opts.Limit == 0 ? std::numeric_limits<size_t>::max() : Opts.Limit);
     llvm::DenseSet<const Symbol *> UsedIndexResults;
     auto CorrespondingIndexResult =
         [&](const CodeCompletionResult &SemaResult) -> const Symbol * {
@@ -1061,23 +973,42 @@
   }
 
   // Scores a candidate and adds it to the TopN structure.
-  void addCandidate(TopN &Candidates, const CodeCompletionResult *SemaResult,
+  void addCandidate(TopN<ScoredCandidate, ScoredCandidateGreater> &Candidates,
+                    const CodeCompletionResult *SemaResult,
                     const Symbol *IndexResult) {
     CompletionCandidate C;
     C.SemaResult = SemaResult;
     C.IndexResult = IndexResult;
     C.Name = IndexResult ? IndexResult->Name : Recorder->getName(*SemaResult);
 
-    CompletionItemScores Scores;
+    SymbolQualitySignals Quality;
+    SymbolRelevanceSignals Relevance;
     if (auto FuzzyScore = Filter->match(C.Name))
-      Scores.filterScore = *FuzzyScore;
+      Relevance.NameMatch = *FuzzyScore;
     else
       return;
-    Scores.symbolScore = C.score();
-    // We score candidates by multiplying symbolScore ("quality" of the result)
-    // with filterScore (how well it matched the query).
-    // This is sensitive to the distribution of both component scores!
-    Scores.finalScore = Scores.filterScore * Scores.symbolScore;
+    if (IndexResult)
+      Quality.merge(*IndexResult);
+    if (SemaResult) {
+      Quality.merge(*SemaResult);
+      Relevance.merge(*SemaResult);
+    }
+
+    float QualScore = Quality.evaluate(), RelScore = Relevance.evaluate();
+    CompletionItemScores Scores;
+    Scores.finalScore = evaluateSymbolAndRelevance(QualScore, RelScore);
+    // The purpose of exporting component scores is to allow NameMatch to be
+    // replaced on the client-side. So we export (NameMatch, final/NameMatch)
+    // rather than (RelScore, QualScore).
+    Scores.filterScore = Relevance.NameMatch;
+    Scores.symbolScore =
+        Scores.filterScore ? Scores.finalScore / Scores.filterScore : QualScore;
+
+    DEBUG(llvm::dbgs() << "CodeComplete: " << C.Name
+                       << (IndexResult ? " (index)" : "")
+                       << (SemaResult ? " (sema)" : "") << " = "
+                       << Scores.finalScore << "\n"
+                       << Quality << Relevance << "\n");
 
     NSema += bool(SemaResult);
     NIndex += bool(IndexResult);
Index: clang-tools-extra/trunk/clangd/Quality.h
===================================================================
--- clang-tools-extra/trunk/clangd/Quality.h
+++ clang-tools-extra/trunk/clangd/Quality.h
@@ -0,0 +1,126 @@
+//===--- Quality.h - Ranking alternatives for ambiguous queries -*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+///
+/// Some operations such as code completion produce a set of candidates.
+/// Usually the user can choose between them, but we should put the best options
+/// at the top (they're easier to select, and more likely to be seen).
+///
+/// This file defines building blocks for ranking candidates.
+/// It's used by the features directly and also in the implementation of
+/// indexes, as indexes also need to heuristically limit their results.
+///
+/// The facilities here are:
+///   - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
+///     These are structured in a way that they can be debugged, and are fairly
+///     consistent regardless of the source.
+///   - compute scores from scoring signals. These are suitable for sorting.
+///   - sorting utilities like the TopN container.
+/// These could be split up further to isolate dependencies if we care.
+///
+//===---------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
+#include "llvm/ADT/StringRef.h"
+#include <algorithm>
+#include <functional>
+#include <vector>
+namespace llvm {
+class raw_ostream;
+}
+namespace clang {
+class CodeCompletionResult;
+namespace clangd {
+struct Symbol;
+
+// Signals structs are designed to be aggregated from 0 or more sources.
+// A default instance has neutral signals, and sources are merged into it.
+// They can be dumped for debugging, and evaluate()d into a score.
+
+/// Attributes of a symbol that affect how much we like it.
+struct SymbolQualitySignals {
+  unsigned SemaCCPriority = 0; // 1-80, 1 is best. 0 means absent.
+                               // FIXME: this is actually a mix of symbol
+                               //        quality and relevance. Untangle this.
+  bool Deprecated = false;
+  unsigned References = 0;
+
+  void merge(const CodeCompletionResult &SemaCCResult);
+  void merge(const Symbol &IndexResult);
+
+  // Condense these signals down to a single number, higher is better.
+  float evaluate() const;
+};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                              const SymbolQualitySignals &);
+
+/// Attributes of a symbol-query pair that affect how much we like it.
+struct SymbolRelevanceSignals {
+  // 0-1 fuzzy-match score for unqualified name. Must be explicitly assigned.
+  float NameMatch = 1;
+  bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
+
+  void merge(const CodeCompletionResult &SemaResult);
+
+  // Condense these signals down to a single number, higher is better.
+  float evaluate() const;
+};
+llvm::raw_ostream &operator<<(llvm::raw_ostream &,
+                              const SymbolRelevanceSignals &);
+
+/// Combine symbol quality and relevance into a single score.
+float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
+
+/// TopN<T> is a lossy container that preserves only the "best" N elements.
+template <typename T, typename Compare = std::greater<T>> class TopN {
+public:
+  using value_type = T;
+  TopN(size_t N, Compare Greater = Compare())
+      : N(N), Greater(std::move(Greater)) {}
+
+  // Adds a candidate to the set.
+  // Returns true if a candidate was dropped to get back under N.
+  bool push(value_type &&V) {
+    bool Dropped = false;
+    if (Heap.size() >= N) {
+      Dropped = true;
+      if (N > 0 && Greater(V, Heap.front())) {
+        std::pop_heap(Heap.begin(), Heap.end(), Greater);
+        Heap.back() = std::move(V);
+        std::push_heap(Heap.begin(), Heap.end(), Greater);
+      }
+    } else {
+      Heap.push_back(std::move(V));
+      std::push_heap(Heap.begin(), Heap.end(), Greater);
+    }
+    assert(Heap.size() <= N);
+    assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
+    return Dropped;
+  }
+
+  // Returns candidates from best to worst.
+  std::vector<value_type> items() && {
+    std::sort_heap(Heap.begin(), Heap.end(), Greater);
+    assert(Heap.size() <= N);
+    return std::move(Heap);
+  }
+
+private:
+  const size_t N;
+  std::vector<value_type> Heap; // Min-heap, comparator is Greater.
+  Compare Greater;
+};
+
+/// Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP.
+/// (The highest score compares smallest so it sorts at the top).
+std::string sortText(float Score, llvm::StringRef Tiebreak = "");
+
+} // namespace clangd
+} // namespace clang
+
+#endif
Index: clang-tools-extra/trunk/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/trunk/clangd/CMakeLists.txt
+++ clang-tools-extra/trunk/clangd/CMakeLists.txt
@@ -28,6 +28,7 @@
   Logger.cpp
   Protocol.cpp
   ProtocolHandlers.cpp
+  Quality.cpp
   SourceCode.cpp
   Threading.cpp
   Trace.cpp
Index: clang-tools-extra/trunk/clangd/Quality.cpp
===================================================================
--- clang-tools-extra/trunk/clangd/Quality.cpp
+++ clang-tools-extra/trunk/clangd/Quality.cpp
@@ -0,0 +1,108 @@
+//===--- Quality.cpp --------------------------------------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+#include "Quality.h"
+#include "index/Index.h"
+#include "clang/Sema/CodeCompleteConsumer.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace clangd {
+using namespace llvm;
+
+void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
+  SemaCCPriority = SemaCCResult.Priority;
+
+  if (SemaCCResult.Availability == CXAvailability_Deprecated)
+    Deprecated = true;
+}
+
+void SymbolQualitySignals::merge(const Symbol &IndexResult) {
+  References = std::max(IndexResult.References, References);
+}
+
+float SymbolQualitySignals::evaluate() const {
+  float Score = 1;
+
+  // This avoids a sharp gradient for tail symbols, and also neatly avoids the
+  // question of whether 0 references means a bad symbol or missing data.
+  if (References >= 3)
+    Score *= std::log(References);
+
+  if (SemaCCPriority)
+    // Map onto a 0-2 interval, so we don't reward/penalize non-Sema results.
+    // Priority 80 is a really bad score.
+    Score *= 2 - std::min<float>(80, SemaCCPriority) / 40;
+
+  if (Deprecated)
+    Score *= 0.1;
+
+  return Score;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SymbolQualitySignals &S) {
+  OS << formatv("=== Symbol quality: {0}\n", S.evaluate());
+  if (S.SemaCCPriority)
+    OS << formatv("\tSemaCCPriority: {0}\n", S.SemaCCPriority);
+  OS << formatv("\tReferences: {0}\n", S.References);
+  OS << formatv("\tDeprecated: {0}\n", S.Deprecated);
+  return OS;
+}
+
+void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
+  if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
+      SemaCCResult.Availability == CXAvailability_NotAccessible)
+    Forbidden = true;
+}
+
+float SymbolRelevanceSignals::evaluate() const {
+  if (Forbidden)
+    return 0;
+  return NameMatch;
+}
+raw_ostream &operator<<(raw_ostream &OS, const SymbolRelevanceSignals &S) {
+  OS << formatv("=== Symbol relevance: {0}\n", S.evaluate());
+  OS << formatv("\tName match: {0}\n", S.NameMatch);
+  OS << formatv("\tForbidden: {0}\n", S.Forbidden);
+  return OS;
+}
+
+float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
+  return SymbolQuality * SymbolRelevance;
+}
+
+// Produces an integer that sorts in the same order as F.
+// That is: a < b <==> encodeFloat(a) < encodeFloat(b).
+static uint32_t encodeFloat(float F) {
+  static_assert(std::numeric_limits<float>::is_iec559, "");
+  constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
+
+  // Get the bits of the float. Endianness is the same as for integers.
+  uint32_t U = FloatToBits(F);
+  // IEEE 754 floats compare like sign-magnitude integers.
+  if (U & TopBit)    // Negative float.
+    return 0 - U;    // Map onto the low half of integers, order reversed.
+  return U + TopBit; // Positive floats map onto the high half of integers.
+}
+
+std::string sortText(float Score, llvm::StringRef Name) {
+  // We convert -Score to an integer, and hex-encode for readability.
+  // Example: [0.5, "foo"] -> "41000000foo"
+  std::string S;
+  llvm::raw_string_ostream OS(S);
+  write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
+            /*Width=*/2 * sizeof(Score));
+  OS << Name;
+  OS.flush();
+  return S;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/trunk/unittests/clangd/TestTU.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/TestTU.cpp
+++ clang-tools-extra/trunk/unittests/clangd/TestTU.cpp
@@ -0,0 +1,95 @@
+//===--- TestTU.cpp - Scratch source files for testing ------------*-
+//C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+#include "TestTU.h"
+#include "TestFS.h"
+#include "index/FileIndex.h"
+#include "index/MemIndex.h"
+#include "clang/Frontend/CompilerInvocation.h"
+#include "clang/Frontend/PCHContainerOperations.h"
+#include "clang/Frontend/Utils.h"
+
+namespace clang {
+namespace clangd {
+using namespace llvm;
+
+ParsedAST TestTU::build() const {
+  std::string FullFilename = testPath(Filename),
+              FullHeaderName = testPath(HeaderFilename);
+  std::vector<const char *> Cmd = {"clang", FullFilename.c_str()};
+  // FIXME: this shouldn't need to be conditional, but it breaks a
+  // GoToDefinition test for some reason (getMacroArgExpandedLocation fails).
+  if (!HeaderCode.empty()) {
+    Cmd.push_back("-include");
+    Cmd.push_back(FullHeaderName.c_str());
+  }
+  auto AST = ParsedAST::Build(
+      createInvocationFromCommandLine(Cmd), nullptr,
+      MemoryBuffer::getMemBufferCopy(Code),
+      std::make_shared<PCHContainerOperations>(),
+      buildTestFS({{FullFilename, Code}, {FullHeaderName, HeaderCode}}));
+  if (!AST.hasValue()) {
+    ADD_FAILURE() << "Failed to build code:\n" << Code;
+    llvm_unreachable("Failed to build TestTU!");
+  }
+  return std::move(*AST);
+}
+
+SymbolSlab TestTU::headerSymbols() const {
+  auto AST = build();
+  return indexAST(&AST);
+}
+
+std::unique_ptr<SymbolIndex> TestTU::index() const {
+  return MemIndex::build(headerSymbols());
+}
+
+// Look up a symbol by qualified name, which must be unique.
+const Symbol &findSymbol(const SymbolSlab &Slab, llvm::StringRef QName) {
+  const Symbol *Result = nullptr;
+  for (const Symbol &S : Slab) {
+    if (QName != (S.Scope + S.Name).str())
+      continue;
+    if (Result) {
+      ADD_FAILURE() << "Multiple symbols named " << QName << ":\n"
+                    << *Result << "\n---\n"
+                    << S;
+      assert(false && "QName is not unique");
+    }
+    Result = &S;
+  }
+  if (!Result) {
+    ADD_FAILURE() << "No symbol named " << QName << " in "
+                  << ::testing::PrintToString(Slab);
+    assert(false && "No symbol with QName");
+  }
+  return *Result;
+}
+
+const NamedDecl &findDecl(ParsedAST &AST, llvm::StringRef QName) {
+  const NamedDecl *Result = nullptr;
+  for (const Decl *D : AST.getTopLevelDecls()) {
+    auto *ND = dyn_cast<NamedDecl>(D);
+    if (!ND || ND->getNameAsString() != QName)
+      continue;
+    if (Result) {
+      ADD_FAILURE() << "Multiple Decls named " << QName;
+      assert(false && "QName is not unique");
+    }
+    Result = ND;
+  }
+  if (!Result) {
+    ADD_FAILURE() << "No Decl named " << QName << " in AST";
+    assert(false && "No Decl with QName");
+  }
+  return *Result;
+}
+
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/trunk/unittests/clangd/ClangdUnitTests.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/ClangdUnitTests.cpp
+++ clang-tools-extra/trunk/unittests/clangd/ClangdUnitTests.cpp
@@ -10,10 +10,7 @@
 #include "Annotations.h"
 #include "ClangdUnit.h"
 #include "SourceCode.h"
-#include "TestFS.h"
-#include "clang/Frontend/CompilerInvocation.h"
-#include "clang/Frontend/PCHContainerOperations.h"
-#include "clang/Frontend/Utils.h"
+#include "TestTU.h"
 #include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
@@ -36,24 +33,6 @@
   return Field(&Diag::Notes, ElementsAre(NoteMatcher));
 }
 
-// FIXME: this is duplicated with FileIndexTests. Share it.
-ParsedAST build(StringRef Code, std::vector<const char *> Flags = {}) {
-  std::vector<const char *> Cmd = {"clang", "main.cpp"};
-  Cmd.insert(Cmd.begin() + 1, Flags.begin(), Flags.end());
-  auto CI = createInvocationFromCommandLine(Cmd);
-  auto Buf = MemoryBuffer::getMemBuffer(Code);
-  auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf),
-                              std::make_shared<PCHContainerOperations>(),
-                              vfs::getRealFileSystem());
-  assert(AST.hasValue());
-  return std::move(*AST);
-}
-
-std::vector<Diag> buildDiags(llvm::StringRef Code,
-                             std::vector<const char *> Flags = {}) {
-  return build(Code, std::move(Flags)).getDiagnostics();
-}
-
 MATCHER_P2(Diag, Range, Message,
            "Diag at " + llvm::to_string(Range) + " = [" + Message + "]") {
   return arg.Range == Range && arg.Message == Message;
@@ -105,7 +84,7 @@
     }
   )cpp");
   EXPECT_THAT(
-      buildDiags(Test.code()),
+      TestTU::withCode(Test.code()).build().getDiagnostics(),
       ElementsAre(
           // This range spans lines.
           AllOf(Diag(Test.range("typo"),
@@ -123,13 +102,15 @@
 
 TEST(DiagnosticsTest, FlagsMatter) {
   Annotations Test("[[void]] main() {}");
-  EXPECT_THAT(buildDiags(Test.code()),
+  auto TU = TestTU::withCode(Test.code());
+  EXPECT_THAT(TU.build().getDiagnostics(),
               ElementsAre(AllOf(Diag(Test.range(), "'main' must return 'int'"),
                                 WithFix(Fix(Test.range(), "int",
                                             "change 'void' to 'int'")))));
   // Same code built as C gets different diagnostics.
+  TU.Filename = "Plain.c";
   EXPECT_THAT(
-      buildDiags(Test.code(), {"-x", "c"}),
+      TU.build().getDiagnostics(),
       ElementsAre(AllOf(
           Diag(Test.range(), "return type of 'main' is not 'int'"),
           WithFix(Fix(Test.range(), "int", "change return type to 'int'")))));
@@ -150,7 +131,7 @@
     #endif
     )cpp");
   EXPECT_THAT(
-      buildDiags(Test.code()),
+      TestTU::withCode(Test.code()).build().getDiagnostics(),
       ElementsAre(Diag(Test.range(), "use of undeclared identifier 'b'")));
 }
 
@@ -229,7 +210,7 @@
            "int ^λλ^λ();", // UTF-8 handled properly when backing up
        }) {
     Annotations TestCase(Text);
-    auto AST = build(TestCase.code());
+    auto AST = TestTU::withCode(TestCase.code()).build();
     const auto &SourceMgr = AST.getASTContext().getSourceManager();
     SourceLocation Actual = getBeginningOfIdentifier(
         AST, TestCase.points().back(), SourceMgr.getMainFileID());
Index: clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp
+++ clang-tools-extra/trunk/unittests/clangd/QualityTests.cpp
@@ -0,0 +1,123 @@
+//===-- SourceCodeTests.cpp  ------------------------------------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Evaluating scoring functions isn't a great fit for assert-based tests.
+// For interesting cases, both exact scores and "X beats Y" are too brittle to
+// make good hard assertions.
+//
+// Here we test the signal extraction and sanity-check that signals point in
+// the right direction. This should be supplemented by quality metrics which
+// we can compute from a corpus of queries and preferred rankings.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Quality.h"
+#include "TestTU.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+TEST(QualityTests, SymbolQualitySignalExtraction) {
+  auto Header = TestTU::withHeaderCode(R"cpp(
+    int x;
+
+    [[deprecated]]
+    int f() { return x; }
+  )cpp");
+  auto Symbols = Header.headerSymbols();
+  auto AST = Header.build();
+
+  SymbolQualitySignals Quality;
+  Quality.merge(findSymbol(Symbols, "x"));
+  EXPECT_FALSE(Quality.Deprecated);
+  EXPECT_EQ(Quality.SemaCCPriority, SymbolQualitySignals().SemaCCPriority);
+  EXPECT_EQ(Quality.References, SymbolQualitySignals().References);
+
+  Symbol F = findSymbol(Symbols, "f");
+  F.References = 24; // TestTU doesn't count references, so fake it.
+  Quality = {};
+  Quality.merge(F);
+  EXPECT_FALSE(Quality.Deprecated); // FIXME: Include deprecated bit in index.
+  EXPECT_EQ(Quality.SemaCCPriority, SymbolQualitySignals().SemaCCPriority);
+  EXPECT_EQ(Quality.References, 24u);
+
+  Quality = {};
+  Quality.merge(CodeCompletionResult(&findDecl(AST, "f"), /*Priority=*/42));
+  EXPECT_TRUE(Quality.Deprecated);
+  EXPECT_EQ(Quality.SemaCCPriority, 42u);
+  EXPECT_EQ(Quality.References, SymbolQualitySignals().References);
+}
+
+TEST(QualityTests, SymbolRelevanceSignalExtraction) {
+  auto AST = TestTU::withHeaderCode(R"cpp(
+    [[deprecated]]
+    int f() { return 0; }
+  )cpp")
+                 .build();
+
+  SymbolRelevanceSignals Relevance;
+  Relevance.merge(CodeCompletionResult(&findDecl(AST, "f"), /*Priority=*/42,
+                                       nullptr, false, /*Accessible=*/false));
+  EXPECT_EQ(Relevance.NameMatch, SymbolRelevanceSignals().NameMatch);
+  EXPECT_TRUE(Relevance.Forbidden);
+}
+
+// Do the signals move the scores in the direction we expect?
+TEST(QualityTests, SymbolQualitySignalsSanity) {
+  SymbolQualitySignals Default;
+  EXPECT_EQ(Default.evaluate(), 1);
+
+  SymbolQualitySignals Deprecated;
+  Deprecated.Deprecated = true;
+  EXPECT_LT(Deprecated.evaluate(), Default.evaluate());
+
+  SymbolQualitySignals WithReferences, ManyReferences;
+  WithReferences.References = 10;
+  ManyReferences.References = 1000;
+  EXPECT_GT(WithReferences.evaluate(), Default.evaluate());
+  EXPECT_GT(ManyReferences.evaluate(), WithReferences.evaluate());
+
+  SymbolQualitySignals LowPriority, HighPriority;
+  LowPriority.SemaCCPriority = 60;
+  HighPriority.SemaCCPriority = 20;
+  EXPECT_GT(HighPriority.evaluate(), Default.evaluate());
+  EXPECT_LT(LowPriority.evaluate(), Default.evaluate());
+}
+
+TEST(QualityTests, SymbolRelevanceSignalsSanity) {
+  SymbolRelevanceSignals Default;
+  EXPECT_EQ(Default.evaluate(), 1);
+
+  SymbolRelevanceSignals Forbidden;
+  Forbidden.Forbidden = true;
+  EXPECT_LT(Forbidden.evaluate(), Default.evaluate());
+
+  SymbolRelevanceSignals PoorNameMatch;
+  PoorNameMatch.NameMatch = 0.2;
+  EXPECT_LT(PoorNameMatch.evaluate(), Default.evaluate());
+}
+
+TEST(QualityTests, SortText) {
+  EXPECT_LT(sortText(std::numeric_limits<float>::infinity()), sortText(1000.2));
+  EXPECT_LT(sortText(1000.2), sortText(1));
+  EXPECT_LT(sortText(1), sortText(0.3));
+  EXPECT_LT(sortText(0.3), sortText(0));
+  EXPECT_LT(sortText(0), sortText(-10));
+  EXPECT_LT(sortText(-10), sortText(-std::numeric_limits<float>::infinity()));
+
+  EXPECT_LT(sortText(1, "z"), sortText(0, "a"));
+  EXPECT_LT(sortText(0, "a"), sortText(0, "z"));
+}
+
+} // namespace
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/trunk/unittests/clangd/XRefsTests.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/XRefsTests.cpp
+++ clang-tools-extra/trunk/unittests/clangd/XRefsTests.cpp
@@ -12,15 +12,13 @@
 #include "Matchers.h"
 #include "SyncAPI.h"
 #include "TestFS.h"
+#include "TestTU.h"
 #include "XRefs.h"
-#include "gmock/gmock.h"
 #include "index/FileIndex.h"
 #include "index/SymbolCollector.h"
-#include "clang/Frontend/CompilerInvocation.h"
-#include "clang/Frontend/PCHContainerOperations.h"
-#include "clang/Frontend/Utils.h"
 #include "clang/Index/IndexingAction.h"
 #include "llvm/Support/Path.h"
+#include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
 namespace clang {
@@ -39,34 +37,6 @@
                           std::vector<Diag> Diagnostics) override {}
 };
 
-// FIXME: this is duplicated with FileIndexTests. Share it.
-ParsedAST build(StringRef MainCode, StringRef HeaderCode = "") {
-  auto HeaderPath = testPath("foo.h");
-  auto MainPath = testPath("foo.cpp");
-  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS(
-      new vfs::InMemoryFileSystem());
-  VFS->addFile(MainPath, 0, llvm::MemoryBuffer::getMemBuffer(MainCode));
-  VFS->addFile(HeaderPath, 0, llvm::MemoryBuffer::getMemBuffer(HeaderCode));
-  std::vector<const char *> Cmd = {"clang", "-xc++", MainPath.c_str()};
-  if (!HeaderCode.empty()) {
-    std::vector<const char *> args = {"-include", HeaderPath.c_str()};
-    Cmd.insert(Cmd.begin() + 1, args.begin(), args.end());
-  }
-  auto CI = createInvocationFromCommandLine(Cmd);
-
-  auto Buf = MemoryBuffer::getMemBuffer(MainCode);
-  auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf),
-                              std::make_shared<PCHContainerOperations>(), VFS);
-  assert(AST.hasValue());
-  return std::move(*AST);
-}
-
-std::unique_ptr<SymbolIndex> buildIndex(StringRef MainCode,
-                                        StringRef HeaderCode) {
-  auto AST = build(MainCode, HeaderCode);
-  return MemIndex::build(indexAST(&AST));
-}
-
 // Extracts ranges from an annotated example, and constructs a matcher for a
 // highlight set. Ranges should be named $read/$write as appropriate.
 Matcher<const std::vector<DocumentHighlight> &>
@@ -117,7 +87,7 @@
   };
   for (const char *Test : Tests) {
     Annotations T(Test);
-    auto AST = build(T.code());
+    auto AST = TestTU::withCode(T.code()).build();
     EXPECT_THAT(findDocumentHighlights(AST, T.point()), HighlightsFrom(T))
         << Test;
   }
@@ -139,10 +109,12 @@
       void  $f1[[f1]]() {}
     )cpp");
 
-  auto Index = buildIndex(SymbolCpp.code(), SymbolHeader.code());
+  TestTU TU;
+  TU.Code = SymbolCpp.code();
+  TU.HeaderCode = SymbolHeader.code();
+  auto Index = TU.index();
   auto runFindDefinitionsWithIndex = [&Index](const Annotations &Main) {
-    auto AST = build(/*MainCode=*/Main.code(),
-                     /*HeaderCode=*/"");
+    auto AST = TestTU::withCode(Main.code()).build();
     return clangd::findDefinitions(AST, Main.point(), Index.get());
   };
 
@@ -329,7 +301,7 @@
   };
   for (const char *Test : Tests) {
     Annotations T(Test);
-    auto AST = build(T.code());
+    auto AST = TestTU::withCode(T.code()).build();
     std::vector<Matcher<Location>> ExpectedLocations;
     for (const auto &R : T.ranges())
       ExpectedLocations.push_back(RangeIs(R));
@@ -661,7 +633,7 @@
 
   for (const OneTest &Test : Tests) {
     Annotations T(Test.Input);
-    auto AST = build(T.code());
+    auto AST = TestTU::withCode(T.code()).build();
     Hover H = getHover(AST, T.point());
 
     EXPECT_EQ(H.contents.value, Test.ExpectedHover) << Test.Input;
Index: clang-tools-extra/trunk/unittests/clangd/TestFS.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/TestFS.cpp
+++ clang-tools-extra/trunk/unittests/clangd/TestFS.cpp
@@ -20,8 +20,8 @@
       new vfs::InMemoryFileSystem);
   for (auto &FileAndContents : Files) {
     MemFS->addFile(FileAndContents.first(), time_t(),
-                   MemoryBuffer::getMemBuffer(FileAndContents.second,
-                                              FileAndContents.first()));
+                   MemoryBuffer::getMemBufferCopy(FileAndContents.second,
+                                                  FileAndContents.first()));
   }
   return MemFS;
 }
Index: clang-tools-extra/trunk/unittests/clangd/TestTU.h
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/TestTU.h
+++ clang-tools-extra/trunk/unittests/clangd/TestTU.h
@@ -0,0 +1,59 @@
+//===--- TestTU.h - Scratch source files for testing ------------*- C++-*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===---------------------------------------------------------------------===//
+//
+// Many tests for indexing, code completion etc are most naturally expressed
+// using code examples.
+// TestTU lets test define these examples in a common way without dealing with
+// the mechanics of VFS and compiler interactions, and then easily grab the
+// AST, particular symbols, etc.
+//
+//===---------------------------------------------------------------------===//
+#ifndef LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H
+#define LLVM_CLANG_TOOLS_EXTRA_UNITTESTS_CLANGD_TESTTU_H
+#include "ClangdUnit.h"
+#include "index/Index.h"
+#include "gtest/gtest.h"
+
+namespace clang {
+namespace clangd {
+
+struct TestTU {
+  static TestTU withCode(llvm::StringRef Code) {
+    TestTU TU;
+    TU.Code = Code;
+    return TU;
+  }
+
+  static TestTU withHeaderCode(llvm::StringRef HeaderCode) {
+    TestTU TU;
+    TU.HeaderCode = HeaderCode;
+    return TU;
+  }
+
+  // The code to be compiled.
+  std::string Code;
+  std::string Filename = "TestTU.cpp";
+
+  // Define contents of a header to be included by TestTU.cpp.
+  std::string HeaderCode;
+  std::string HeaderFilename = "TestTU.h";
+
+  ParsedAST build() const;
+  SymbolSlab headerSymbols() const;
+  std::unique_ptr<SymbolIndex> index() const;
+};
+
+// Look up an index symbol by qualified name, which must be unique.
+const Symbol &findSymbol(const SymbolSlab &, llvm::StringRef QName);
+// Look up an AST symbol by qualified name, which must be unique and top-level.
+const NamedDecl &findDecl(ParsedAST &AST, llvm::StringRef QName);
+
+} // namespace clangd
+} // namespace clang
+#endif
Index: clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
+++ clang-tools-extra/trunk/unittests/clangd/FileIndexTests.cpp
@@ -7,11 +7,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "TestFS.h"
+#include "TestTU.h"
 #include "index/FileIndex.h"
-#include "clang/Frontend/CompilerInvocation.h"
-#include "clang/Frontend/PCHContainerOperations.h"
-#include "clang/Frontend/Utils.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -83,38 +80,19 @@
   return Matches;
 }
 
-/// Create an ParsedAST for \p Code. Returns None if \p Code is empty.
-/// \p Code is put into <Path>.h which is included by \p <BasePath>.cpp.
-llvm::Optional<ParsedAST> build(llvm::StringRef BasePath,
-                                llvm::StringRef Code) {
-  if (Code.empty())
-    return llvm::None;
-
-  assert(llvm::sys::path::extension(BasePath).empty() &&
-         "BasePath must be a base file path without extension.");
-  llvm::IntrusiveRefCntPtr<vfs::InMemoryFileSystem> VFS(
-      new vfs::InMemoryFileSystem);
-  std::string Path = testPath((BasePath + ".cpp").str());
-  std::string Header = testPath((BasePath + ".h").str());
-  VFS->addFile(Path, 0, llvm::MemoryBuffer::getMemBuffer(""));
-  VFS->addFile(Header, 0, llvm::MemoryBuffer::getMemBuffer(Code));
-  const char *Args[] = {"clang", "-xc++", "-include", Header.c_str(),
-                        Path.c_str()};
-
-  auto CI = createInvocationFromCommandLine(Args);
-
-  auto Buf = llvm::MemoryBuffer::getMemBuffer(Code);
-  auto AST = ParsedAST::Build(std::move(CI), nullptr, std::move(Buf),
-                              std::make_shared<PCHContainerOperations>(), VFS);
-  assert(AST.hasValue());
-  return std::move(*AST);
+// Adds Basename.cpp, which includes Basename.h, which contains Code.
+void update(FileIndex &M, llvm::StringRef Basename, llvm::StringRef Code) {
+  TestTU File;
+  File.Filename = (Basename + ".cpp").str();
+  File.HeaderFilename = (Basename + ".h").str();
+  File.HeaderCode = Code;
+  auto AST = File.build();
+  M.update(File.Filename, &AST);
 }
 
 TEST(FileIndexTest, IndexAST) {
   FileIndex M;
-  M.update(
-      "f1",
-      build("f1", "namespace ns { void f() {} class X {}; }").getPointer());
+  update(M, "f1", "namespace ns { void f() {} class X {}; }");
 
   FuzzyFindRequest Req;
   Req.Query = "";
@@ -124,24 +102,17 @@
 
 TEST(FileIndexTest, NoLocal) {
   FileIndex M;
-  M.update(
-      "f1",
-      build("f1", "namespace ns { void f() { int local = 0; } class X {}; }")
-          .getPointer());
+  update(M, "f1", "namespace ns { void f() { int local = 0; } class X {}; }");
 
   FuzzyFindRequest Req;
   Req.Query = "";
   EXPECT_THAT(match(M, Req), UnorderedElementsAre("ns", "ns::f", "ns::X"));
 }
 
 TEST(FileIndexTest, IndexMultiASTAndDeduplicate) {
   FileIndex M;
-  M.update(
-      "f1",
-      build("f1", "namespace ns { void f() {} class X {}; }").getPointer());
-  M.update(
-      "f2",
-      build("f2", "namespace ns { void ff() {} class X {}; }").getPointer());
+  update(M, "f1", "namespace ns { void f() {} class X {}; }");
+  update(M, "f2", "namespace ns { void ff() {} class X {}; }");
 
   FuzzyFindRequest Req;
   Req.Query = "";
@@ -151,39 +122,35 @@
 
 TEST(FileIndexTest, RemoveAST) {
   FileIndex M;
-  M.update(
-      "f1",
-      build("f1", "namespace ns { void f() {} class X {}; }").getPointer());
+  update(M, "f1", "namespace ns { void f() {} class X {}; }");
 
   FuzzyFindRequest Req;
   Req.Query = "";
   Req.Scopes = {"ns::"};
   EXPECT_THAT(match(M, Req), UnorderedElementsAre("ns::f", "ns::X"));
 
-  M.update("f1", nullptr);
+  M.update("f1.cpp", nullptr);
   EXPECT_THAT(match(M, Req), UnorderedElementsAre());
 }
 
 TEST(FileIndexTest, RemoveNonExisting) {
   FileIndex M;
-  M.update("no", nullptr);
+  M.update("no.cpp", nullptr);
   EXPECT_THAT(match(M, FuzzyFindRequest()), UnorderedElementsAre());
 }
 
 TEST(FileIndexTest, IgnoreClassMembers) {
   FileIndex M;
-  M.update("f1",
-           build("f1", "class X { static int m1; int m2; static void f(); };")
-               .getPointer());
+  update(M, "f1", "class X { static int m1; int m2; static void f(); };");
 
   FuzzyFindRequest Req;
   Req.Query = "";
   EXPECT_THAT(match(M, Req), UnorderedElementsAre("X"));
 }
 
 TEST(FileIndexTest, NoIncludeCollected) {
   FileIndex M;
-  M.update("f", build("f", "class string {};").getPointer());
+  update(M, "f", "class string {};");
 
   FuzzyFindRequest Req;
   Req.Query = "";
@@ -206,7 +173,7 @@
 )cpp";
 
   FileIndex M;
-  M.update("f", build("f", Source).getPointer());
+  update(M, "f", Source);
 
   FuzzyFindRequest Req;
   Req.Query = "";
Index: clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
+++ clang-tools-extra/trunk/unittests/clangd/CMakeLists.txt
@@ -23,10 +23,12 @@
   HeadersTests.cpp
   IndexTests.cpp
   JSONExprTests.cpp
+  QualityTests.cpp
   SourceCodeTests.cpp
   SymbolCollectorTests.cpp
   SyncAPI.cpp
   TestFS.cpp
+  TestTU.cpp
   ThreadingTests.cpp
   TraceTests.cpp
   TUSchedulerTests.cpp
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to