kbobyrev created this revision.
kbobyrev added reviewers: ioeric, ilya-biryukov, sammccall.
kbobyrev added a project: clang-tools-extra.
Herald added subscribers: kadircet, arphaman, jkorous, MaskRay, mgorny.

This patch introduces `PostingList` interface which is helpful for experiments 
with Sparse and Dense (will be introduced in the next few patches) posting list 
representation.

No functionality change is introduced, this patch is mostly refactoring so that 
the following patches could focus on functionality while not being too hard to 
review.


https://reviews.llvm.org/D51982

Files:
  clang-tools-extra/clangd/CMakeLists.txt
  clang-tools-extra/clangd/index/dex/Dex.cpp
  clang-tools-extra/clangd/index/dex/Dex.h
  clang-tools-extra/clangd/index/dex/Iterator.cpp
  clang-tools-extra/clangd/index/dex/Iterator.h
  clang-tools-extra/clangd/index/dex/PostingList.cpp
  clang-tools-extra/clangd/index/dex/PostingList.h
  clang-tools-extra/unittests/clangd/DexTests.cpp

Index: clang-tools-extra/unittests/clangd/DexTests.cpp
===================================================================
--- clang-tools-extra/unittests/clangd/DexTests.cpp
+++ clang-tools-extra/unittests/clangd/DexTests.cpp
@@ -47,8 +47,8 @@
 }
 
 TEST(DexIterators, DocumentIterator) {
-  const PostingList L = {4, 7, 8, 20, 42, 100};
-  auto DocIterator = create(L);
+  const SparsePostingList L({4, 7, 8, 20, 42, 100});
+  auto DocIterator = L.iterator();
 
   EXPECT_EQ(DocIterator->peek(), 4U);
   EXPECT_FALSE(DocIterator->reachedEnd());
@@ -70,28 +70,28 @@
 }
 
 TEST(DexIterators, AndWithEmpty) {
-  const PostingList L0;
-  const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
+  const SparsePostingList L0({});
+  const SparsePostingList L1({0, 5, 7, 10, 42, 320, 9000});
 
-  auto AndEmpty = createAnd(create(L0));
+  auto AndEmpty = createAnd(L0.iterator());
   EXPECT_TRUE(AndEmpty->reachedEnd());
 
-  auto AndWithEmpty = createAnd(create(L0), create(L1));
+  auto AndWithEmpty = createAnd(L0.iterator(), L1.iterator());
   EXPECT_TRUE(AndWithEmpty->reachedEnd());
 
   EXPECT_THAT(consumeIDs(*AndWithEmpty), ElementsAre());
 }
 
 TEST(DexIterators, AndTwoLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+  const SparsePostingList L0({0, 5, 7, 10, 42, 320, 9000});
+  const SparsePostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
 
-  auto And = createAnd(create(L1), create(L0));
+  auto And = createAnd(L1.iterator(), L0.iterator());
 
   EXPECT_FALSE(And->reachedEnd());
   EXPECT_THAT(consumeIDs(*And), ElementsAre(0U, 7U, 10U, 320U, 9000U));
 
-  And = createAnd(create(L0), create(L1));
+  And = createAnd(L0.iterator(), L1.iterator());
 
   And->advanceTo(0);
   EXPECT_EQ(And->peek(), 0U);
@@ -107,11 +107,11 @@
 }
 
 TEST(DexIterators, AndThreeLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
-  const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
+  const SparsePostingList L0({0, 5, 7, 10, 42, 320, 9000});
+  const SparsePostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
+  const SparsePostingList L2({1, 4, 7, 11, 30, 60, 320, 9000});
 
-  auto And = createAnd(create(L0), create(L1), create(L2));
+  auto And = createAnd(L0.iterator(), L1.iterator(), L2.iterator());
   EXPECT_EQ(And->peek(), 7U);
   And->advanceTo(300);
   EXPECT_EQ(And->peek(), 320U);
@@ -121,24 +121,24 @@
 }
 
 TEST(DexIterators, OrWithEmpty) {
-  const PostingList L0;
-  const PostingList L1 = {0, 5, 7, 10, 42, 320, 9000};
+  const SparsePostingList L0({});
+  const SparsePostingList L1({0, 5, 7, 10, 42, 320, 9000});
 
-  auto OrEmpty = createOr(create(L0));
+  auto OrEmpty = createOr(L0.iterator());
   EXPECT_TRUE(OrEmpty->reachedEnd());
 
-  auto OrWithEmpty = createOr(create(L0), create(L1));
+  auto OrWithEmpty = createOr(L0.iterator(), L1.iterator());
   EXPECT_FALSE(OrWithEmpty->reachedEnd());
 
   EXPECT_THAT(consumeIDs(*OrWithEmpty),
               ElementsAre(0U, 5U, 7U, 10U, 42U, 320U, 9000U));
 }
 
 TEST(DexIterators, OrTwoLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
+  const SparsePostingList L0({0, 5, 7, 10, 42, 320, 9000});
+  const SparsePostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
 
-  auto Or = createOr(create(L0), create(L1));
+  auto Or = createOr(L0.iterator(), L1.iterator());
 
   EXPECT_FALSE(Or->reachedEnd());
   EXPECT_EQ(Or->peek(), 0U);
@@ -161,18 +161,18 @@
   Or->advanceTo(9001);
   EXPECT_TRUE(Or->reachedEnd());
 
-  Or = createOr(create(L0), create(L1));
+  Or = createOr(L0.iterator(), L1.iterator());
 
   EXPECT_THAT(consumeIDs(*Or),
               ElementsAre(0U, 4U, 5U, 7U, 10U, 30U, 42U, 60U, 320U, 9000U));
 }
 
 TEST(DexIterators, OrThreeLists) {
-  const PostingList L0 = {0, 5, 7, 10, 42, 320, 9000};
-  const PostingList L1 = {0, 4, 7, 10, 30, 60, 320, 9000};
-  const PostingList L2 = {1, 4, 7, 11, 30, 60, 320, 9000};
+  const SparsePostingList L0({0, 5, 7, 10, 42, 320, 9000});
+  const SparsePostingList L1({0, 4, 7, 10, 30, 60, 320, 9000});
+  const SparsePostingList L2({1, 4, 7, 11, 30, 60, 320, 9000});
 
-  auto Or = createOr(create(L0), create(L1), create(L2));
+  auto Or = createOr(L0.iterator(), L1.iterator(), L2.iterator());
 
   EXPECT_FALSE(Or->reachedEnd());
   EXPECT_EQ(Or->peek(), 0U);
@@ -221,19 +221,19 @@
   //                  |1, 5, 7, 9|                      |1, 5|    |0, 3, 5|
   //                  +----------+                      +----+    +-------+
   //
-  const PostingList L0 = {1, 3, 5, 8, 9};
-  const PostingList L1 = {1, 5, 7, 9};
-  const PostingList L3;
-  const PostingList L4 = {1, 5};
-  const PostingList L5 = {0, 3, 5};
+  const SparsePostingList L0({1, 3, 5, 8, 9});
+  const SparsePostingList L1({1, 5, 7, 9});
+  const SparsePostingList L3({});
+  const SparsePostingList L4({1, 5});
+  const SparsePostingList L5({0, 3, 5});
 
   // Root of the query tree: [1, 5]
   auto Root = createAnd(
       // Lower And Iterator: [1, 5, 9]
-      createAnd(create(L0), createBoost(create(L1), 2U)),
+      createAnd(L0.iterator(), createBoost(L1.iterator(), 2U)),
       // Lower Or Iterator: [0, 1, 5]
-      createOr(create(L3), createBoost(create(L4), 3U),
-               createBoost(create(L5), 4U)));
+      createOr(L3.iterator(), createBoost(L4.iterator(), 3U),
+               createBoost(L5.iterator(), 4U)));
 
   EXPECT_FALSE(Root->reachedEnd());
   EXPECT_EQ(Root->peek(), 1U);
@@ -255,52 +255,51 @@
 }
 
 TEST(DexIterators, StringRepresentation) {
-  const PostingList L0 = {4, 7, 8, 20, 42, 100};
-  const PostingList L1 = {1, 3, 5, 8, 9};
-  const PostingList L2 = {1, 5, 7, 9};
-  const PostingList L3 = {0, 5};
-  const PostingList L4 = {0, 1, 5};
-  const PostingList L5;
+  const SparsePostingList L0({4, 7, 8, 20, 42, 100});
+  const SparsePostingList L1({1, 3, 5, 8, 9});
+  const SparsePostingList L2({1, 5, 7, 9});
+  const SparsePostingList L3({0, 5});
+  const SparsePostingList L4({0, 1, 5});
+  const SparsePostingList L5({});
 
-  EXPECT_EQ(llvm::to_string(*(create(L0))), "[{4}, 7, 8, 20, 42, 100, END]");
+  EXPECT_EQ(llvm::to_string(*(L0.iterator())), "[4]");
 
-  auto Nested = createAnd(createAnd(create(L1), create(L2)),
-                          createOr(create(L3), create(L4), create(L5)));
+  auto Nested =
+      createAnd(createAnd(L1.iterator(), L2.iterator()),
+                createOr(L3.iterator(), L4.iterator(), L5.iterator()));
 
-  EXPECT_EQ(llvm::to_string(*Nested),
-            "(& (| [0, {5}, END] [0, {1}, 5, END] [{END}]) (& [{1}, 5, 7, 9, "
-            "END] [{1}, 3, 5, 8, 9, END]))");
+  EXPECT_EQ(llvm::to_string(*Nested), "(& (| [5] [1] [END]) (& [1] [1]))");
 }
 
 TEST(DexIterators, Limit) {
-  const PostingList L0 = {3, 6, 7, 20, 42, 100};
-  const PostingList L1 = {1, 3, 5, 6, 7, 30, 100};
-  const PostingList L2 = {0, 3, 5, 7, 8, 100};
+  const SparsePostingList L0({3, 6, 7, 20, 42, 100});
+  const SparsePostingList L1({1, 3, 5, 6, 7, 30, 100});
+  const SparsePostingList L2({0, 3, 5, 7, 8, 100});
 
-  auto DocIterator = createLimit(create(L0), 42);
+  auto DocIterator = createLimit(L0.iterator(), 42);
   EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7, 20, 42, 100));
 
-  DocIterator = createLimit(create(L0), 3);
+  DocIterator = createLimit(L0.iterator(), 3);
   EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre(3, 6, 7));
 
-  DocIterator = createLimit(create(L0), 0);
+  DocIterator = createLimit(L0.iterator(), 0);
   EXPECT_THAT(consumeIDs(*DocIterator), ElementsAre());
 
-  auto AndIterator =
-      createAnd(createLimit(createTrue(9000), 343), createLimit(create(L0), 2),
-                createLimit(create(L1), 3), createLimit(create(L2), 42));
+  auto AndIterator = createAnd(
+      createLimit(createTrue(9000), 343), createLimit(L0.iterator(), 2),
+      createLimit(L1.iterator(), 3), createLimit(L2.iterator(), 42));
   EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(3, 7));
 }
 
 TEST(DexIterators, True) {
   auto TrueIterator = createTrue(0U);
   EXPECT_TRUE(TrueIterator->reachedEnd());
   EXPECT_THAT(consumeIDs(*TrueIterator), ElementsAre());
 
-  PostingList L0 = {1, 2, 5, 7};
+  const SparsePostingList L0({1, 2, 5, 7});
   TrueIterator = createTrue(7U);
   EXPECT_THAT(TrueIterator->peek(), 0);
-  auto AndIterator = createAnd(create(L0), move(TrueIterator));
+  auto AndIterator = createAnd(L0.iterator(), move(TrueIterator));
   EXPECT_FALSE(AndIterator->reachedEnd());
   EXPECT_THAT(consumeIDs(*AndIterator), ElementsAre(1, 2, 5));
 }
@@ -311,10 +310,10 @@
   auto ElementBoost = BoostIterator->consume();
   EXPECT_THAT(ElementBoost, 42U);
 
-  const PostingList L0 = {2, 4};
-  const PostingList L1 = {1, 4};
-  auto Root = createOr(createTrue(5U), createBoost(create(L0), 2U),
-                       createBoost(create(L1), 3U));
+  const SparsePostingList L0({2, 4});
+  const SparsePostingList L1({1, 4});
+  auto Root = createOr(createTrue(5U), createBoost(L0.iterator(), 2U),
+                       createBoost(L1.iterator(), 3U));
 
   ElementBoost = Root->consume();
   EXPECT_THAT(ElementBoost, Iterator::DEFAULT_BOOST_SCORE);
@@ -453,10 +452,10 @@
 }
 
 TEST(Dex, FuzzyFind) {
-  auto Index = Dex::build(
-      generateSymbols({"ns::ABC", "ns::BCD", "::ABC", "ns::nested::ABC",
-                       "other::ABC", "other::A"}),
-      URISchemes);
+  auto Index =
+      Dex::build(generateSymbols({"ns::ABC", "ns::BCD", "::ABC",
+                                  "ns::nested::ABC", "other::ABC", "other::A"}),
+                 URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "ABC";
   Req.Scopes = {"ns::"};
@@ -494,7 +493,7 @@
 // should handle deduplication instead.
 TEST(DexTest, DexDeduplicate) {
   std::vector<Symbol> Symbols = {symbol("1"), symbol("2"), symbol("3"),
-                                 symbol("2") /* duplicate */};
+                                 symbol("2")};
   FuzzyFindRequest Req;
   Req.Query = "2";
   Dex I(Symbols, URISchemes);
@@ -524,16 +523,14 @@
 }
 
 TEST(DexTest, MatchQualifiedNamesWithoutSpecificScope) {
-  auto I =
-      Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
+  auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   EXPECT_THAT(match(*I, Req), UnorderedElementsAre("a::y1", "b::y2", "y3"));
 }
 
 TEST(DexTest, MatchQualifiedNamesWithGlobalScope) {
-  auto I =
-      Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
+  auto I = Dex::build(generateSymbols({"a::y1", "b::y2", "y3"}), URISchemes);
   FuzzyFindRequest Req;
   Req.Query = "y";
   Req.Scopes = {""};
Index: clang-tools-extra/clangd/index/dex/PostingList.h
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/PostingList.h
@@ -0,0 +1,57 @@
+//===--- PostingList.h - FIXME(kbobyrev): Description -----------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// FIXME(kbobryev): Description.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include <cstdint>
+#include <vector>
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+/// Symbol position in the list of all index symbols sorted by a pre-computed
+/// symbol quality.
+using DocID = uint32_t;
+
+class Iterator;
+
+// FIXME(kbobyrev): Description.
+// FIXME(kbobyrev): Implement dense posting list implementation using VByte
+// encoding.
+class PostingList {
+public:
+  virtual std::unique_ptr<Iterator> iterator() const = 0;
+
+  virtual ~PostingList() {}
+};
+
+// FIXME(kbobyrev): Description: rationale, complexity.
+class SparsePostingList : public PostingList {
+public:
+  explicit SparsePostingList(const std::vector<DocID> &&Documents)
+      : Documents(std::move(Documents)) {}
+
+  virtual std::unique_ptr<Iterator> iterator() const override;
+
+private:
+  const std::vector<DocID> Documents;
+};
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_POSTINGLIST_H
Index: clang-tools-extra/clangd/index/dex/PostingList.cpp
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/index/dex/PostingList.cpp
@@ -0,0 +1,84 @@
+//===--- PostingList.cpp - FIXME(kbobyrev): Description ---------*- C++ -*-===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PostingList.h"
+#include "Iterator.h"
+
+namespace clang {
+namespace clangd {
+namespace dex {
+
+namespace {
+
+/// Implements Iterator over sparse PostingList. This is the most basic
+/// iterator: it doesn't have any children (hence it is the leaf of iterator
+/// tree) and is simply a wrapper around std::vector<DocID>::const_iterator.
+class SparsePostingListIterator : public Iterator {
+public:
+  explicit SparsePostingListIterator(llvm::ArrayRef<DocID> Documents)
+      : Documents(Documents), Index(std::begin(Documents)) {}
+
+  bool reachedEnd() const override { return Index == std::end(Documents); }
+
+  /// Advances cursor to the next item.
+  void advance() override {
+    assert(!reachedEnd() &&
+           "Sparse Posting List iterator can't advance() at the end.");
+    ++Index;
+  }
+
+  /// Applies binary search to advance cursor to the next item with DocID equal
+  /// or higher than the given one.
+  void advanceTo(DocID ID) override {
+    assert(!reachedEnd() &&
+           "Sparse Posting List iterator can't advance() at the end.");
+    // If current ID is beyond requested one, iterator is already in the right
+    // state.
+    if (peek() < ID)
+      Index = std::lower_bound(Index, std::end(Documents), ID);
+  }
+
+  DocID peek() const override {
+    assert(!reachedEnd() &&
+           "Sparse Posting List iterator can't peek() at the end.");
+    return *Index;
+  }
+
+  float consume() override {
+    assert(!reachedEnd() &&
+           "Sparse Posting List iterator can't consume() at the end.");
+    return DEFAULT_BOOST_SCORE;
+  }
+
+  size_t estimateSize() const override { return Documents.size(); }
+
+private:
+  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
+    OS << '[';
+    if (Index != std::end(Documents))
+      OS << *Index;
+    else
+      OS << "END";
+    OS << ']';
+    return OS;
+  }
+
+  llvm::ArrayRef<DocID> Documents;
+  llvm::ArrayRef<DocID>::const_iterator Index;
+};
+
+} // namespace
+
+std::unique_ptr<Iterator> SparsePostingList::iterator() const {
+  return llvm::make_unique<SparsePostingListIterator>(Documents);
+}
+
+} // namespace dex
+} // namespace clangd
+} // namespace clang
Index: clang-tools-extra/clangd/index/dex/Iterator.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.h
+++ clang-tools-extra/clangd/index/dex/Iterator.h
@@ -36,29 +36,12 @@
 #include <algorithm>
 #include <memory>
 #include <vector>
+#include "PostingList.h"
 
 namespace clang {
 namespace clangd {
 namespace dex {
 
-/// Symbol position in the list of all index symbols sorted by a pre-computed
-/// symbol quality.
-using DocID = uint32_t;
-/// Contains sorted sequence of DocIDs all of which belong to symbols matching
-/// certain criteria, i.e. containing a Search Token. PostingLists are values
-/// for the inverted index.
-// FIXME(kbobyrev): Posting lists for incomplete trigrams (one/two symbols) are
-// likely to be very dense and hence require special attention so that the index
-// doesn't use too much memory. Possible solution would be to construct
-// compressed posting lists which consist of ranges of DocIDs instead of
-// distinct DocIDs. A special case would be the empty query: for that case
-// TrueIterator should be implemented - an iterator which doesn't actually store
-// any PostingList within itself, but "contains" all DocIDs in range
-// [0, IndexSize).
-using PostingList = std::vector<DocID>;
-/// Immutable reference to PostingList object.
-using PostingListRef = llvm::ArrayRef<DocID>;
-
 /// Iterator is the interface for Query Tree node. The simplest type of Iterator
 /// is DocumentIterator which is simply a wrapper around PostingList iterator
 /// and serves as the Query Tree leaf. More sophisticated examples of iterators
@@ -131,11 +114,6 @@
 /// to acquire preliminary scores of requested items.
 std::vector<std::pair<DocID, float>> consume(Iterator &It);
 
-/// Returns a document iterator over given PostingList.
-///
-/// DocumentIterator returns DEFAULT_BOOST_SCORE for each processed item.
-std::unique_ptr<Iterator> create(PostingListRef Documents);
-
 /// Returns AND Iterator which performs the intersection of the PostingLists of
 /// its children.
 ///
Index: clang-tools-extra/clangd/index/dex/Iterator.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Iterator.cpp
+++ clang-tools-extra/clangd/index/dex/Iterator.cpp
@@ -8,6 +8,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "Iterator.h"
+#include "PostingList.h"
 #include <algorithm>
 #include <cassert>
 #include <numeric>
@@ -18,69 +19,6 @@
 
 namespace {
 
-/// Implements Iterator over a PostingList. DocumentIterator is the most basic
-/// iterator: it doesn't have any children (hence it is the leaf of iterator
-/// tree) and is simply a wrapper around PostingList::const_iterator.
-class DocumentIterator : public Iterator {
-public:
-  explicit DocumentIterator(PostingListRef Documents)
-      : Documents(Documents), Index(std::begin(Documents)) {}
-
-  bool reachedEnd() const override { return Index == std::end(Documents); }
-
-  /// Advances cursor to the next item.
-  void advance() override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end.");
-    ++Index;
-  }
-
-  /// Applies binary search to advance cursor to the next item with DocID equal
-  /// or higher than the given one.
-  void advanceTo(DocID ID) override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't advance() at the end.");
-    // If current ID is beyond requested one, iterator is already in the right
-    // state.
-    if (peek() < ID)
-      Index = std::lower_bound(Index, std::end(Documents), ID);
-  }
-
-  DocID peek() const override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't peek() at the end.");
-    return *Index;
-  }
-
-  float consume() override {
-    assert(!reachedEnd() && "DOCUMENT iterator can't consume() at the end.");
-    return DEFAULT_BOOST_SCORE;
-  }
-
-  size_t estimateSize() const override { return Documents.size(); }
-
-private:
-  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
-    OS << '[';
-    auto Separator = "";
-    for (auto It = std::begin(Documents); It != std::end(Documents); ++It) {
-      OS << Separator;
-      if (It == Index)
-        OS << '{' << *It << '}';
-      else
-        OS << *It;
-      Separator = ", ";
-    }
-    OS << Separator;
-    if (Index == std::end(Documents))
-      OS << "{END}";
-    else
-      OS << "END";
-    OS << ']';
-    return OS;
-  }
-
-  PostingListRef Documents;
-  PostingListRef::const_iterator Index;
-};
-
 /// Implements Iterator over the intersection of other iterators.
 ///
 /// AndIterator iterates through common items among all children. It becomes
@@ -399,10 +337,6 @@
   return Result;
 }
 
-std::unique_ptr<Iterator> create(PostingListRef Documents) {
-  return llvm::make_unique<DocumentIterator>(Documents);
-}
-
 std::unique_ptr<Iterator>
 createAnd(std::vector<std::unique_ptr<Iterator>> Children) {
   return llvm::make_unique<AndIterator>(move(Children));
Index: clang-tools-extra/clangd/index/dex/Dex.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Dex.h
+++ clang-tools-extra/clangd/index/dex/Dex.h
@@ -21,6 +21,7 @@
 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
 
 #include "Iterator.h"
+#include "PostingList.h"
 #include "Token.h"
 #include "Trigram.h"
 #include "index/Index.h"
@@ -98,7 +99,7 @@
   /// posting list would contain all indices of symbols defined in namespace
   /// std. Inverted index is used to retrieve posting lists which are processed
   /// during the fuzzyFind process.
-  llvm::DenseMap<Token, PostingList> InvertedIndex;
+  llvm::DenseMap<Token, std::unique_ptr<PostingList>> InvertedIndex;
   std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
   // Size of memory retained by KeepAlive.
   size_t BackingDataSize = 0;
Index: clang-tools-extra/clangd/index/dex/Dex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Dex.cpp
+++ clang-tools-extra/clangd/index/dex/Dex.cpp
@@ -46,7 +46,7 @@
 std::vector<std::unique_ptr<Iterator>> createFileProximityIterators(
     llvm::ArrayRef<std::string> ProximityPaths,
     llvm::ArrayRef<std::string> URISchemes,
-    const llvm::DenseMap<Token, PostingList> &InvertedIndex) {
+    const llvm::DenseMap<Token, std::unique_ptr<PostingList>> &InvertedIndex) {
   std::vector<std::unique_ptr<Iterator>> BoostingIterators;
   // Deduplicate parent URIs extracted from the ProximityPaths.
   llvm::StringSet<> ParentURIs;
@@ -82,7 +82,7 @@
       // FIXME(kbobyrev): Append LIMIT on top of every BOOST iterator.
       PathProximitySignals.SymbolURI = ParentURI;
       BoostingIterators.push_back(
-          createBoost(create(It->second), PathProximitySignals.evaluate()));
+          createBoost(It->second->iterator(), PathProximitySignals.evaluate()));
     }
   }
   return BoostingIterators;
@@ -113,12 +113,18 @@
   }
 
   // Populate TempInvertedIndex with posting lists for index symbols.
+  llvm::DenseMap<Token, std::vector<DocID>> TempInvertedIndex;
   for (DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
     const auto *Sym = Symbols[SymbolRank];
     for (const auto &Token : generateSearchTokens(*Sym))
-      InvertedIndex[Token].push_back(SymbolRank);
+      TempInvertedIndex[Token].push_back(SymbolRank);
   }
 
+  for (const auto &TokenToPostingList : TempInvertedIndex)
+    InvertedIndex.insert(
+        {TokenToPostingList.first, llvm::make_unique<SparsePostingList>(
+                                       move(TokenToPostingList.second))});
+
   vlog("Built Dex with estimated memory usage {0} bytes.",
        estimateMemoryUsage());
 }
@@ -142,7 +148,7 @@
   for (const auto &Trigram : TrigramTokens) {
     const auto It = InvertedIndex.find(Trigram);
     if (It != InvertedIndex.end())
-      TrigramIterators.push_back(create(It->second));
+      TrigramIterators.push_back(It->second->iterator());
   }
   if (!TrigramIterators.empty())
     TopLevelChildren.push_back(createAnd(move(TrigramIterators)));
@@ -152,7 +158,7 @@
   for (const auto &Scope : Req.Scopes) {
     const auto It = InvertedIndex.find(Token(Token::Kind::Scope, Scope));
     if (It != InvertedIndex.end())
-      ScopeIterators.push_back(create(It->second));
+      ScopeIterators.push_back(It->second->iterator());
   }
   // Add OR iterator for scopes if there are any Scope Iterators.
   if (!ScopeIterators.empty())
@@ -231,8 +237,10 @@
   Bytes += SymbolQuality.size() * sizeof(float);
   Bytes += LookupTable.getMemorySize();
   Bytes += InvertedIndex.getMemorySize();
+  /*
   for (const auto &P : InvertedIndex)
     Bytes += P.second.size() * sizeof(DocID);
+  */
   return Bytes + BackingDataSize;
 }
 
Index: clang-tools-extra/clangd/CMakeLists.txt
===================================================================
--- clang-tools-extra/clangd/CMakeLists.txt
+++ clang-tools-extra/clangd/CMakeLists.txt
@@ -48,6 +48,7 @@
 
   index/dex/Dex.cpp
   index/dex/Iterator.cpp
+  index/dex/PostingList.cpp
   index/dex/Trigram.cpp
 
   LINK_LIBS
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to