[PATCH] D88745: [clangd][WIP] Add new code completion signals to improve MRR by 3%.

2020-10-05 Thread Utkarsh Saxena via Phabricator via cfe-commits
usaxena95 updated this revision to Diff 296159.
usaxena95 edited the summary of this revision.
usaxena95 added a comment.

Updated the references to old signals.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D88745/new/

https://reviews.llvm.org/D88745

Files:
  clang-tools-extra/clangd/Quality.cpp
  clang-tools-extra/clangd/Quality.h
  
clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json

Index: clang-tools-extra/clangd/quality/model/features.json
===
--- clang-tools-extra/clangd/quality/model/features.json
+++ clang-tools-extra/clangd/quality/model/features.json
@@ -20,7 +20,7 @@
 "kind": "NUMBER"
 },
 {
-"name": "IsNameInContext",
+"name": "NumNameInContext",
 "kind": "NUMBER"
 },
 {
@@ -63,6 +63,10 @@
 "name": "TypeMatchesPreferred",
 "kind": "NUMBER"
 },
+{
+"name": "SemaPriority",
+"kind": "NUMBER"
+},
 {
 "name": "SymbolCategory",
 "kind": "ENUM",
@@ -81,4 +85,4 @@
 "type": "clang::clangd::SymbolRelevanceSignals::AccessibleScope",
 "header": "Quality.h"
 }
-]
\ No newline at end of file
+]
Index: clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp
===
--- clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp
+++ clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp
@@ -39,7 +39,7 @@
 E.setNumReferences(RandInt(1)); // Can be large integer.
 E.setSymbolCategory(RandInt(10));   // 10 Symbol Category.
 
-E.setIsNameInContext(FlipCoin(0.5)); // Boolean.
+E.setNumNameInContext(RandInt(5));   // Boolean.
 E.setIsForbidden(FlipCoin(0.1)); // Boolean.
 E.setIsInBaseClass(FlipCoin(0.3));   // Boolean.
 E.setFileProximityDistance(
@@ -57,6 +57,7 @@
 E.setHadSymbolType(FlipCoin(0.6));// Boolean.
 E.setTypeMatchesPreferred(FlipCoin(0.5)); // Boolean.
 E.setFilterLength(RandInt(15));
+E.setSemaPriority(RandInt(100));
 Examples.push_back(E);
   }
   return Examples;
Index: clang-tools-extra/clangd/Quality.h
===
--- clang-tools-extra/clangd/Quality.h
+++ clang-tools-extra/clangd/Quality.h
@@ -140,11 +140,14 @@
   /// CompletionPrefix.
   unsigned FilterLength = 0;
 
+  /// Priority of the completion item as computed by Sema.
+  unsigned SemaPriority = 0;
+
   /// Set of derived signals computed by calculateDerivedSignals(). Must not be
   /// set explicitly.
   struct DerivedSignals {
-/// Whether Name contains some word from context.
-bool NameMatchesContext = false;
+/// Number of words in Context that matches Name.
+unsigned NumNameInContext = 0;
 /// Min distance between SymbolURI and all the headers included by the TU.
 unsigned FileProximityDistance = FileDistance::Unreachable;
 /// Min distance between SymbolScope and all the available scopes.
Index: clang-tools-extra/clangd/Quality.cpp
===
--- clang-tools-extra/clangd/Quality.cpp
+++ clang-tools-extra/clangd/Quality.cpp
@@ -300,6 +300,8 @@
   SemaCCResult.Availability == CXAvailability_NotAccessible)
 Forbidden = true;
 
+  SemaPriority = SemaCCResult.Priority;
+
   if (SemaCCResult.Declaration) {
 SemaSaysInScope = true;
 // We boost things that have decls in the main file. We give a fixed score
@@ -352,7 +354,12 @@
 SymbolRelevanceSignals::DerivedSignals
 SymbolRelevanceSignals::calculateDerivedSignals() const {
   DerivedSignals Derived;
-  Derived.NameMatchesContext = wordMatching(Name, ContextWords).hasValue();
+  int NumNameInContext = 0;
+  if (ContextWords)
+for (const auto &Word : ContextWords->keys())
+  if (Name.contains_lower(Word))
+NumNameInContext++;
+  Derived.NumNameInContext = NumNameInContext;
   Derived.FileProximityDistance = !FileProximityMatch || SymbolURI.empty()
   ? FileDistance::Unreachable
   : FileProximityMatch->distance(SymbolURI);
@@ -387,7 +394,7 @@
  ? 2.0
  : scopeProximityScore(Derived.ScopeProximityDistance);
 
-  if (Derived.NameMatchesContext)
+  if (Derived.NumNameInContext > 0)
 Score *= 1.5;
 
   // Symbols like local variables may only be referenced within their scope.
@@ -498,7 +505,7 @@
 
   SymbolRelevanceSignals::DerivedSignals Derived =
   Relevance.calculateDerivedSignals();
-  E.setIsNameInContext(Derived.NameMatchesContext);
+  E.setNumNameInContext(Derived.NumNameInContext);
   E.setIsForbidden(Relevance.Forbidden);
  

[PATCH] D88745: [clangd][WIP] Add new code completion signals to improve MRR by 3%.

2020-10-02 Thread Utkarsh Saxena via Phabricator via cfe-commits
usaxena95 created this revision.
Herald added subscribers: cfe-commits, kadircet, arphaman.
Herald added a project: clang.
usaxena95 requested review of this revision.
Herald added subscribers: MaskRay, ilya-biryukov.

Adds two more signals.

- NumNameInContext: Strength of match of name with context.
- SemaPriority: Priority of a sema completion as given by sema.

This will potentially increase the MRR further by 3%.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D88745

Files:
  clang-tools-extra/clangd/Quality.cpp
  clang-tools-extra/clangd/Quality.h
  clang-tools-extra/clangd/quality/model/features.json
  clang-tools-extra/clangd/quality/model/forest.json


Index: clang-tools-extra/clangd/quality/model/features.json
===
--- clang-tools-extra/clangd/quality/model/features.json
+++ clang-tools-extra/clangd/quality/model/features.json
@@ -23,6 +23,10 @@
 "name": "IsNameInContext",
 "kind": "NUMBER"
 },
+{
+"name": "NumNameInContext",
+"kind": "NUMBER"
+},
 {
 "name": "IsForbidden",
 "kind": "NUMBER"
@@ -63,6 +67,10 @@
 "name": "TypeMatchesPreferred",
 "kind": "NUMBER"
 },
+{
+"name": "SemaPriority",
+"kind": "NUMBER"
+},
 {
 "name": "SymbolCategory",
 "kind": "ENUM",
Index: clang-tools-extra/clangd/Quality.h
===
--- clang-tools-extra/clangd/Quality.h
+++ clang-tools-extra/clangd/Quality.h
@@ -140,6 +140,9 @@
   /// CompletionPrefix.
   unsigned FilterLength = 0;
 
+  /// Priority of the completion item as computed by Sema.
+  unsigned SemaPriority = 0;
+
   /// Set of derived signals computed by calculateDerivedSignals(). Must not be
   /// set explicitly.
   struct DerivedSignals {
Index: clang-tools-extra/clangd/Quality.cpp
===
--- clang-tools-extra/clangd/Quality.cpp
+++ clang-tools-extra/clangd/Quality.cpp
@@ -300,6 +300,8 @@
   SemaCCResult.Availability == CXAvailability_NotAccessible)
 Forbidden = true;
 
+  SemaPriority = SemaCCResult.Priority;
+
   if (SemaCCResult.Declaration) {
 SemaSaysInScope = true;
 // We boost things that have decls in the main file. We give a fixed score
@@ -499,6 +501,8 @@
   SymbolRelevanceSignals::DerivedSignals Derived =
   Relevance.calculateDerivedSignals();
   E.setIsNameInContext(Derived.NameMatchesContext);
+  // FIXME: Use number of matches of name in context here.
+  E.setNumNameInContext(Derived.NameMatchesContext);
   E.setIsForbidden(Relevance.Forbidden);
   E.setIsInBaseClass(Relevance.InBaseClass);
   E.setFileProximityDistance(Derived.FileProximityDistance);
@@ -512,6 +516,7 @@
   E.setHadSymbolType(Relevance.HadSymbolType);
   E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred);
   E.setFilterLength(Relevance.FilterLength);
+  E.setSemaPriority(Relevance.SemaPriority);
   return Evaluate(E);
 }
 


Index: clang-tools-extra/clangd/quality/model/features.json
===
--- clang-tools-extra/clangd/quality/model/features.json
+++ clang-tools-extra/clangd/quality/model/features.json
@@ -23,6 +23,10 @@
 "name": "IsNameInContext",
 "kind": "NUMBER"
 },
+{
+"name": "NumNameInContext",
+"kind": "NUMBER"
+},
 {
 "name": "IsForbidden",
 "kind": "NUMBER"
@@ -63,6 +67,10 @@
 "name": "TypeMatchesPreferred",
 "kind": "NUMBER"
 },
+{
+"name": "SemaPriority",
+"kind": "NUMBER"
+},
 {
 "name": "SymbolCategory",
 "kind": "ENUM",
Index: clang-tools-extra/clangd/Quality.h
===
--- clang-tools-extra/clangd/Quality.h
+++ clang-tools-extra/clangd/Quality.h
@@ -140,6 +140,9 @@
   /// CompletionPrefix.
   unsigned FilterLength = 0;
 
+  /// Priority of the completion item as computed by Sema.
+  unsigned SemaPriority = 0;
+
   /// Set of derived signals computed by calculateDerivedSignals(). Must not be
   /// set explicitly.
   struct DerivedSignals {
Index: clang-tools-extra/clangd/Quality.cpp
===
--- clang-tools-extra/clangd/Quality.cpp
+++ clang-tools-extra/clangd/Quality.cpp
@@ -300,6 +300,8 @@
   SemaCCResult.Availability == CXAvailability_NotAccessible)
 Forbidden = true;
 
+  SemaPriority = SemaCCResult.Priority;
+
   if (SemaCCResult.Declaration) {
 SemaSaysInScope = true;
 // We boost things that have decls in the main file. We give a fixed score
@@ -499,6 +501,8 @@
   SymbolRelevanceSignals::DerivedSignals Derived =
   Relevance.calculateDerivedSignals();
   E.setIsNameInContext(Derived.NameMatchesContext);
+  // FIXME: Use number of matches of name in context here.
+  E.setNumNameInC