[PATCH] D88745: [clangd][WIP] Add new code completion signals to improve MRR by 3%.
usaxena95 updated this revision to Diff 296159. usaxena95 edited the summary of this revision. usaxena95 added a comment. Updated the references to old signals. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D88745/new/ https://reviews.llvm.org/D88745 Files: clang-tools-extra/clangd/Quality.cpp clang-tools-extra/clangd/Quality.h clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp clang-tools-extra/clangd/quality/model/features.json clang-tools-extra/clangd/quality/model/forest.json Index: clang-tools-extra/clangd/quality/model/features.json === --- clang-tools-extra/clangd/quality/model/features.json +++ clang-tools-extra/clangd/quality/model/features.json @@ -20,7 +20,7 @@ "kind": "NUMBER" }, { -"name": "IsNameInContext", +"name": "NumNameInContext", "kind": "NUMBER" }, { @@ -63,6 +63,10 @@ "name": "TypeMatchesPreferred", "kind": "NUMBER" }, +{ +"name": "SemaPriority", +"kind": "NUMBER" +}, { "name": "SymbolCategory", "kind": "ENUM", @@ -81,4 +85,4 @@ "type": "clang::clangd::SymbolRelevanceSignals::AccessibleScope", "header": "Quality.h" } -] \ No newline at end of file +] Index: clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp === --- clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp +++ clang-tools-extra/clangd/benchmarks/CompletionModel/DecisionForestBenchmark.cpp @@ -39,7 +39,7 @@ E.setNumReferences(RandInt(1)); // Can be large integer. E.setSymbolCategory(RandInt(10)); // 10 Symbol Category. -E.setIsNameInContext(FlipCoin(0.5)); // Boolean. +E.setNumNameInContext(RandInt(5)); // Boolean. E.setIsForbidden(FlipCoin(0.1)); // Boolean. E.setIsInBaseClass(FlipCoin(0.3)); // Boolean. E.setFileProximityDistance( @@ -57,6 +57,7 @@ E.setHadSymbolType(FlipCoin(0.6));// Boolean. E.setTypeMatchesPreferred(FlipCoin(0.5)); // Boolean. E.setFilterLength(RandInt(15)); +E.setSemaPriority(RandInt(100)); Examples.push_back(E); } return Examples; Index: clang-tools-extra/clangd/Quality.h === --- clang-tools-extra/clangd/Quality.h +++ clang-tools-extra/clangd/Quality.h @@ -140,11 +140,14 @@ /// CompletionPrefix. unsigned FilterLength = 0; + /// Priority of the completion item as computed by Sema. + unsigned SemaPriority = 0; + /// Set of derived signals computed by calculateDerivedSignals(). Must not be /// set explicitly. struct DerivedSignals { -/// Whether Name contains some word from context. -bool NameMatchesContext = false; +/// Number of words in Context that matches Name. +unsigned NumNameInContext = 0; /// Min distance between SymbolURI and all the headers included by the TU. unsigned FileProximityDistance = FileDistance::Unreachable; /// Min distance between SymbolScope and all the available scopes. Index: clang-tools-extra/clangd/Quality.cpp === --- clang-tools-extra/clangd/Quality.cpp +++ clang-tools-extra/clangd/Quality.cpp @@ -300,6 +300,8 @@ SemaCCResult.Availability == CXAvailability_NotAccessible) Forbidden = true; + SemaPriority = SemaCCResult.Priority; + if (SemaCCResult.Declaration) { SemaSaysInScope = true; // We boost things that have decls in the main file. We give a fixed score @@ -352,7 +354,12 @@ SymbolRelevanceSignals::DerivedSignals SymbolRelevanceSignals::calculateDerivedSignals() const { DerivedSignals Derived; - Derived.NameMatchesContext = wordMatching(Name, ContextWords).hasValue(); + int NumNameInContext = 0; + if (ContextWords) +for (const auto &Word : ContextWords->keys()) + if (Name.contains_lower(Word)) +NumNameInContext++; + Derived.NumNameInContext = NumNameInContext; Derived.FileProximityDistance = !FileProximityMatch || SymbolURI.empty() ? FileDistance::Unreachable : FileProximityMatch->distance(SymbolURI); @@ -387,7 +394,7 @@ ? 2.0 : scopeProximityScore(Derived.ScopeProximityDistance); - if (Derived.NameMatchesContext) + if (Derived.NumNameInContext > 0) Score *= 1.5; // Symbols like local variables may only be referenced within their scope. @@ -498,7 +505,7 @@ SymbolRelevanceSignals::DerivedSignals Derived = Relevance.calculateDerivedSignals(); - E.setIsNameInContext(Derived.NameMatchesContext); + E.setNumNameInContext(Derived.NumNameInContext); E.setIsForbidden(Relevance.Forbidden);
[PATCH] D88745: [clangd][WIP] Add new code completion signals to improve MRR by 3%.
usaxena95 created this revision. Herald added subscribers: cfe-commits, kadircet, arphaman. Herald added a project: clang. usaxena95 requested review of this revision. Herald added subscribers: MaskRay, ilya-biryukov. Adds two more signals. - NumNameInContext: Strength of match of name with context. - SemaPriority: Priority of a sema completion as given by sema. This will potentially increase the MRR further by 3%. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D88745 Files: clang-tools-extra/clangd/Quality.cpp clang-tools-extra/clangd/Quality.h clang-tools-extra/clangd/quality/model/features.json clang-tools-extra/clangd/quality/model/forest.json Index: clang-tools-extra/clangd/quality/model/features.json === --- clang-tools-extra/clangd/quality/model/features.json +++ clang-tools-extra/clangd/quality/model/features.json @@ -23,6 +23,10 @@ "name": "IsNameInContext", "kind": "NUMBER" }, +{ +"name": "NumNameInContext", +"kind": "NUMBER" +}, { "name": "IsForbidden", "kind": "NUMBER" @@ -63,6 +67,10 @@ "name": "TypeMatchesPreferred", "kind": "NUMBER" }, +{ +"name": "SemaPriority", +"kind": "NUMBER" +}, { "name": "SymbolCategory", "kind": "ENUM", Index: clang-tools-extra/clangd/Quality.h === --- clang-tools-extra/clangd/Quality.h +++ clang-tools-extra/clangd/Quality.h @@ -140,6 +140,9 @@ /// CompletionPrefix. unsigned FilterLength = 0; + /// Priority of the completion item as computed by Sema. + unsigned SemaPriority = 0; + /// Set of derived signals computed by calculateDerivedSignals(). Must not be /// set explicitly. struct DerivedSignals { Index: clang-tools-extra/clangd/Quality.cpp === --- clang-tools-extra/clangd/Quality.cpp +++ clang-tools-extra/clangd/Quality.cpp @@ -300,6 +300,8 @@ SemaCCResult.Availability == CXAvailability_NotAccessible) Forbidden = true; + SemaPriority = SemaCCResult.Priority; + if (SemaCCResult.Declaration) { SemaSaysInScope = true; // We boost things that have decls in the main file. We give a fixed score @@ -499,6 +501,8 @@ SymbolRelevanceSignals::DerivedSignals Derived = Relevance.calculateDerivedSignals(); E.setIsNameInContext(Derived.NameMatchesContext); + // FIXME: Use number of matches of name in context here. + E.setNumNameInContext(Derived.NameMatchesContext); E.setIsForbidden(Relevance.Forbidden); E.setIsInBaseClass(Relevance.InBaseClass); E.setFileProximityDistance(Derived.FileProximityDistance); @@ -512,6 +516,7 @@ E.setHadSymbolType(Relevance.HadSymbolType); E.setTypeMatchesPreferred(Relevance.TypeMatchesPreferred); E.setFilterLength(Relevance.FilterLength); + E.setSemaPriority(Relevance.SemaPriority); return Evaluate(E); } Index: clang-tools-extra/clangd/quality/model/features.json === --- clang-tools-extra/clangd/quality/model/features.json +++ clang-tools-extra/clangd/quality/model/features.json @@ -23,6 +23,10 @@ "name": "IsNameInContext", "kind": "NUMBER" }, +{ +"name": "NumNameInContext", +"kind": "NUMBER" +}, { "name": "IsForbidden", "kind": "NUMBER" @@ -63,6 +67,10 @@ "name": "TypeMatchesPreferred", "kind": "NUMBER" }, +{ +"name": "SemaPriority", +"kind": "NUMBER" +}, { "name": "SymbolCategory", "kind": "ENUM", Index: clang-tools-extra/clangd/Quality.h === --- clang-tools-extra/clangd/Quality.h +++ clang-tools-extra/clangd/Quality.h @@ -140,6 +140,9 @@ /// CompletionPrefix. unsigned FilterLength = 0; + /// Priority of the completion item as computed by Sema. + unsigned SemaPriority = 0; + /// Set of derived signals computed by calculateDerivedSignals(). Must not be /// set explicitly. struct DerivedSignals { Index: clang-tools-extra/clangd/Quality.cpp === --- clang-tools-extra/clangd/Quality.cpp +++ clang-tools-extra/clangd/Quality.cpp @@ -300,6 +300,8 @@ SemaCCResult.Availability == CXAvailability_NotAccessible) Forbidden = true; + SemaPriority = SemaCCResult.Priority; + if (SemaCCResult.Declaration) { SemaSaysInScope = true; // We boost things that have decls in the main file. We give a fixed score @@ -499,6 +501,8 @@ SymbolRelevanceSignals::DerivedSignals Derived = Relevance.calculateDerivedSignals(); E.setIsNameInContext(Derived.NameMatchesContext); + // FIXME: Use number of matches of name in context here. + E.setNumNameInC