ilya-biryukov created this revision.
ilya-biryukov added reviewers: ioeric, sammccall.
Herald added subscribers: kadircet, arphaman, jkorous, MaskRay.

And add a hidden option to control whether the types are collected.
For experiments, will be removed when expected types implementation
is stabilized.

The index size is almost unchanged, e.g. the YAML index for all clangd
sources increased from 53MB to 54MB.


Repository:
  rCTE Clang Tools Extra

https://reviews.llvm.org/D52274

Files:
  clangd/index/Index.cpp
  clangd/index/Index.h
  clangd/index/Serialization.cpp
  clangd/index/SymbolCollector.cpp
  clangd/index/SymbolCollector.h
  clangd/index/SymbolYAML.cpp
  clangd/indexer/IndexerMain.cpp

Index: clangd/indexer/IndexerMain.cpp
===================================================================
--- clangd/indexer/IndexerMain.cpp
+++ clangd/indexer/IndexerMain.cpp
@@ -67,6 +67,11 @@
                      clEnumValN(Binary, "binary", "binary RIFF format")),
     llvm::cl::init(YAML));
 
+static llvm::cl::opt<bool>
+    CollectTypes("collect-types",
+                 llvm::cl::desc("Collect type information during indexing"),
+                 llvm::cl::init(false), llvm::cl::Hidden);
+
 /// Responsible for aggregating symbols from each processed file and producing
 /// the final results. All methods in this class must be thread-safe,
 /// 'consumeSymbols' may be called from multiple threads.
@@ -142,6 +147,7 @@
     CollectorOpts.CollectIncludePath = true;
     CollectorOpts.CountReferences = true;
     CollectorOpts.Origin = SymbolOrigin::Static;
+    CollectorOpts.CollectTypes = CollectTypes;
     auto Includes = llvm::make_unique<CanonicalIncludes>();
     addSystemHeadersMapping(Includes.get());
     CollectorOpts.Includes = Includes.get();
Index: clangd/index/SymbolYAML.cpp
===================================================================
--- clangd/index/SymbolYAML.cpp
+++ clangd/index/SymbolYAML.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "SymbolYAML.h"
+#include "ExpectedTypes.h"
 #include "Index.h"
 #include "Serialization.h"
 #include "Trace.h"
@@ -21,10 +22,12 @@
 
 LLVM_YAML_IS_DOCUMENT_LIST_VECTOR(clang::clangd::Symbol)
 LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences)
+LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::SType);
 
 namespace llvm {
 namespace yaml {
 
+using clang::clangd::SType;
 using clang::clangd::Symbol;
 using clang::clangd::SymbolID;
 using clang::clangd::SymbolLocation;
@@ -94,6 +97,44 @@
   }
 };
 
+template <> struct ScalarTraits<SType> {
+  static void output(const SType &Value, void *, llvm::raw_ostream &Out) {
+    Out << Value.toHexStr();
+  }
+  static StringRef input(StringRef Scalar, void *, SType &Type) {
+    Type = SType::fromHexStr(Scalar);
+    return StringRef();
+  }
+
+  static QuotingType mustQuote(StringRef) { return QuotingType::None; }
+};
+
+// A YamlIO normalizer for fields of type "ArrayRef<T>" with underlying
+// array allocated on an arena. Normalizes to std::vector<T>, so traits
+// should be provided for std::vector<T>.
+template <typename T> struct ArenaArrayPtr {
+  ArenaArrayPtr(IO &) {}
+  ArenaArrayPtr(IO &, llvm::ArrayRef<T> D) {
+    Normalized.assign(D.begin(), D.end());
+  }
+
+  llvm::ArrayRef<T> denormalize(IO &IO) {
+    assert(IO.getContext() && "Expecting an arena (as context) to allocate "
+                              "data for read symbols.");
+    if (Normalized.empty())
+      return llvm::ArrayRef<T>();
+    // Allocate an array on the Arena and copy-construct the objects.
+    auto *Allocator = static_cast<llvm::BumpPtrAllocator *>(IO.getContext());
+    T *Items = Allocator->Allocate<T>(Normalized.size());
+    for (size_t I = 0, Size = Normalized.size(); I < Size; ++I)
+      new (Items + I) T(Normalized[I]);
+    // Return a reference to the array.
+    return llvm::ArrayRef<T>(Items, Items + Normalized.size());
+  }
+
+  std::vector<T> Normalized;
+};
+
 template <> struct MappingTraits<Symbol> {
   static void mapping(IO &IO, Symbol &Sym) {
     MappingNormalization<NormalizedSymbolID, SymbolID> NSymbolID(IO, Sym.ID);
@@ -113,6 +154,9 @@
     IO.mapOptional("Documentation", Sym.Documentation);
     IO.mapOptional("ReturnType", Sym.ReturnType);
     IO.mapOptional("IncludeHeaders", Sym.IncludeHeaders);
+    MappingNormalization<ArenaArrayPtr<SType>, llvm::ArrayRef<SType>> NTypes(
+        IO, Sym.Types);
+    IO.mapOptional("Types", NTypes->Normalized);
   }
 };
 
@@ -169,7 +213,9 @@
 namespace clangd {
 
 SymbolSlab symbolsFromYAML(llvm::StringRef YAMLContent) {
-  llvm::yaml::Input Yin(YAMLContent);
+  // Store data of pointer fields (excl. StringRef) like `Types`.
+  llvm::BumpPtrAllocator Arena;
+  llvm::yaml::Input Yin(YAMLContent, &Arena);
   std::vector<Symbol> S;
   Yin >> S;
 
Index: clangd/index/SymbolCollector.h
===================================================================
--- clangd/index/SymbolCollector.h
+++ clangd/index/SymbolCollector.h
@@ -62,6 +62,8 @@
     /// collect macros. For example, `indexTopLevelDecls` will not index any
     /// macro even if this is true.
     bool CollectMacro = false;
+    /// Collect type information. Used to improve code completion ranking.
+    bool CollectTypes = true;
   };
 
   SymbolCollector(Options Opts);
Index: clangd/index/SymbolCollector.cpp
===================================================================
--- clangd/index/SymbolCollector.cpp
+++ clangd/index/SymbolCollector.cpp
@@ -531,6 +531,11 @@
   if (!Include.empty())
     S.IncludeHeaders.emplace_back(Include, 1);
 
+  llvm::SmallVector<SType, 2> Types;
+  if (Opts.CollectTypes && (S.Flags & Symbol::IndexedForCodeCompletion))
+    Types = SType::fromCompletionResult(*ASTCtx, SymbolCompletion);
+  S.Types = Types;
+
   S.Origin = Opts.Origin;
   if (ND.getAvailability() == AR_Deprecated)
     S.Flags |= Symbol::Deprecated;
Index: clangd/index/Serialization.cpp
===================================================================
--- clangd/index/Serialization.cpp
+++ clangd/index/Serialization.cpp
@@ -360,7 +360,8 @@
   std::vector<Symbol> Symbols;
   for (const auto &Sym : *Data.Symbols) {
     Symbols.emplace_back(Sym);
-    visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
+    visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); },
+                 [&](ArrayRef<SType> &T) { /* FIXME(ibiryukov): own types */ });
   }
 
   std::string StringSection;
Index: clangd/index/Index.h
===================================================================
--- clangd/index/Index.h
+++ clangd/index/Index.h
@@ -10,12 +10,14 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_INDEX_H
 
+#include "ExpectedTypes.h"
 #include "clang/Index/IndexSymbol.h"
 #include "clang/Lex/Lexer.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/Optional.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
@@ -252,6 +254,10 @@
     Deprecated = 1 << 1,
   };
 
+  /// Types that the symbol converts to, including its own type. Can be used
+  /// for scoring when the expected type is known.
+  llvm::ArrayRef<SType> Types;
+
   SymbolFlag Flags = SymbolFlag::None;
   /// FIXME: also add deprecation message and fixit?
 };
@@ -267,7 +273,9 @@
 
 // Invokes Callback with each StringRef& contained in the Symbol.
 // Useful for deduplicating backing strings.
-template <typename Callback> void visitStrings(Symbol &S, const Callback &CB) {
+inline void
+visitStrings(Symbol &S, llvm::function_ref<void(llvm::StringRef &)> CB,
+             llvm::function_ref<void(llvm::ArrayRef<SType> &)> TypesCB) {
   CB(S.Name);
   CB(S.Scope);
   CB(S.CanonicalDeclaration.FileURI);
@@ -278,6 +286,7 @@
   CB(S.ReturnType);
   for (auto &Include : S.IncludeHeaders)
     CB(Include.IncludeHeader);
+  TypesCB(S.Types);
 }
 
 // Computes query-independent quality score for a Symbol.
Index: clangd/index/Index.cpp
===================================================================
--- clangd/index/Index.cpp
+++ clangd/index/Index.cpp
@@ -89,18 +89,24 @@
 }
 
 // Copy the underlying data of the symbol into the owned arena.
-static void own(Symbol &S, llvm::UniqueStringSaver &Strings) {
-  visitStrings(S, [&](StringRef &V) { V = Strings.save(V); });
+static void own(Symbol &S, llvm::UniqueStringSaver &Strings,
+                llvm::BumpPtrAllocator &Arena) {
+  visitStrings(S, [&](StringRef &V) { V = Strings.save(V); },
+               [&](ArrayRef<SType> &T) {
+                 SType *NewItems = Arena.Allocate<SType>(T.size());
+                 llvm::copy(T, NewItems);
+                 T = makeArrayRef(NewItems, T.size());
+               });
 }
 
 void SymbolSlab::Builder::insert(const Symbol &S) {
   auto R = SymbolIndex.try_emplace(S.ID, Symbols.size());
   if (R.second) {
     Symbols.push_back(S);
-    own(Symbols.back(), UniqueStrings);
+    own(Symbols.back(), UniqueStrings, Arena);
   } else {
     auto &Copy = Symbols[R.first->second] = S;
-    own(Copy, UniqueStrings);
+    own(Copy, UniqueStrings, Arena);
   }
 }
 
@@ -113,7 +119,7 @@
   BumpPtrAllocator NewArena;
   llvm::UniqueStringSaver Strings(NewArena);
   for (auto &S : Symbols)
-    own(S, Strings);
+    own(S, Strings, NewArena);
   return SymbolSlab(std::move(NewArena), std::move(Symbols));
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
  • [PATCH] D52274: [clangd] Col... Ilya Biryukov via Phabricator via cfe-commits

Reply via email to