kbobyrev updated this revision to Diff 406401.
kbobyrev added a comment.

Move include-mapping generators to clang and re-generate the files.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119130/new/

https://reviews.llvm.org/D119130

Files:
  clang-tools-extra/clangd/CSymbolMap.inc
  clang-tools-extra/clangd/Headers.cpp
  clang-tools-extra/clangd/Headers.h
  clang-tools-extra/clangd/StdSymbolMap.inc
  clang-tools-extra/clangd/include-mapping/cppreference_parser.py
  clang-tools-extra/clangd/include-mapping/gen_std.py
  clang-tools-extra/clangd/include-mapping/test.py
  clang-tools-extra/clangd/index/CanonicalIncludes.cpp
  clang-tools-extra/clangd/unittests/HeadersTests.cpp
  clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
  clang/include/clang/Tooling/Inclusions/StandardLibrary.h
  clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
  clang/lib/Tooling/Inclusions/CMakeLists.txt
  clang/lib/Tooling/Inclusions/StandardLibrary.cpp
  clang/tools/include-mapping/cppreference_parser.py
  clang/tools/include-mapping/gen_std.py
  clang/tools/include-mapping/test.py
  clang/unittests/Tooling/CMakeLists.txt
  clang/unittests/Tooling/StandardLibraryTest.cpp

Index: clang/unittests/Tooling/StandardLibraryTest.cpp
===================================================================
--- /dev/null
+++ clang/unittests/Tooling/StandardLibraryTest.cpp
@@ -0,0 +1,39 @@
+//===- unittest/Tooling/StandardLibrary.cpp -------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "llvm/Support/ScopedPrinter.h"
+
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+using ::testing::ElementsAre;
+
+namespace clang {
+namespace tooling {
+namespace {
+
+TEST(StdlibTest, All) {
+  auto VectorH = stdlib::Header::named("<vector>");
+  EXPECT_TRUE(VectorH);
+  EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
+  EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
+
+  auto Vector = stdlib::Symbol::named("std::", "vector");
+  EXPECT_TRUE(Vector);
+  EXPECT_EQ(llvm::to_string(*Vector), "std::vector");
+  EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
+  EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
+
+  EXPECT_EQ(Vector->header(), *VectorH);
+  EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
+}
+
+} // namespace
+} // namespace tooling
+} // namespace clang
Index: clang/unittests/Tooling/CMakeLists.txt
===================================================================
--- clang/unittests/Tooling/CMakeLists.txt
+++ clang/unittests/Tooling/CMakeLists.txt
@@ -17,6 +17,7 @@
   ExecutionTest.cpp
   FixItTest.cpp
   HeaderIncludesTest.cpp
+  StandardLibraryTest.cpp
   LexicallyOrderedRecursiveASTVisitorTest.cpp
   LookupTest.cpp
   QualTypeNamesTest.cpp
Index: clang-tools-extra/clangd/include-mapping/test.py
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/include-mapping/test.py
@@ -1,155 +0,0 @@
-#!/usr/bin/env python
-#===- test.py -  ---------------------------------------------*- python -*--===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
-
-from cppreference_parser import _ParseSymbolPage, _ParseIndexPage
-
-import unittest
-
-class TestStdGen(unittest.TestCase):
-
-  def testParseIndexPage(self):
-    html = """
- <a href="abs.html" title="abs"><tt>abs()</tt></a> (int) <br>
- <a href="complex/abs.html" title="abs"><tt>abs&lt;&gt;()</tt></a> (std::complex) <br>
- <a href="acos.html" title="acos"><tt>acos()</tt></a> <br>
- <a href="acosh.html" title="acosh"><tt>acosh()</tt></a> <span class="t-mark-rev">(since C++11)</span> <br>
- <a href="as_bytes.html" title="as bytes"><tt>as_bytes&lt;&gt;()</tt></a> <span class="t-mark-rev t-since-cxx20">(since C++20)</span> <br>
- """
-
-    actual = _ParseIndexPage(html)
-    expected = [
-      ("abs", "abs.html", True),
-      ("abs", "complex/abs.html", True),
-      ("acos", "acos.html", False),
-      ("acosh", "acosh.html", False),
-      ("as_bytes", "as_bytes.html", False),
-    ]
-    self.assertEqual(len(actual), len(expected))
-    for i in range(0, len(actual)):
-      self.assertEqual(expected[i][0], actual[i][0])
-      self.assertTrue(actual[i][1].endswith(expected[i][1]))
-      self.assertEqual(expected[i][2], actual[i][2])
-
-
-  def testParseSymbolPage_SingleHeader(self):
-    # Defined in header <cmath>
-    html = """
- <table class="t-dcl-begin"><tbody>
-  <tr class="t-dsc-header">
-  <td> <div>Defined in header <code><a href="cmath.html" title="cmath">&lt;cmath&gt;</a></code>
-   </div></td>
-  <td></td>
-  <td></td>
-  </tr>
-  <tr class="t-dcl">
-    <td>void foo()</td>
-    <td>this is matched</td>
-  </tr>
-</tbody></table>
-"""
-    self.assertEqual(_ParseSymbolPage(html, 'foo'), set(['<cmath>']))
-
-
-  def testParseSymbolPage_MulHeaders(self):
-    #  Defined in header <cstddef>
-    #  Defined in header <cstdio>
-    #  Defined in header <cstdlib>
-    html = """
-<table class="t-dcl-begin"><tbody>
-  <tr class="t-dsc-header">
-    <td> <div>Defined in header <code><a href="cstddef.html" title="cstddef">&lt;cstddef&gt;</a></code>
-     </div></td>
-     <td></td>
-    <td></td>
-  </tr>
-  <tr class="t-dcl">
-    <td>void bar()</td>
-    <td>this mentions foo, but isn't matched</td>
-  </tr>
-  <tr class="t-dsc-header">
-    <td> <div>Defined in header <code><a href="cstdio.html" title="cstdio">&lt;cstdio&gt;</a></code>
-     </div></td>
-    <td></td>
-    <td></td>
-  </tr>
-  <tr class="t-dsc-header">
-    <td> <div>Defined in header <code><a href=".cstdlib.html" title="ccstdlib">&lt;cstdlib&gt;</a></code>
-     </div></td>
-    <td></td>
-    <td></td>
-  </tr>
-  <tr class="t-dcl">
-    <td>
-      <span>void</span>
-      foo
-      <span>()</span>
-    </td>
-    <td>this is matched</td>
-  </tr>
-</tbody></table>
-"""
-    self.assertEqual(_ParseSymbolPage(html, "foo"),
-                     set(['<cstdio>', '<cstdlib>']))
-
-
-  def testParseSymbolPage_MulHeadersInSameDiv(self):
-    # Multile <code> blocks in a Div.
-    # Defined in header <algorithm>
-    # Defined in header <utility>
-    html = """
-<table class="t-dcl-begin"><tbody>
-<tr class="t-dsc-header">
-<td><div>
-     Defined in header <code><a href="../header/algorithm.html" title="cpp/header/algorithm">&lt;algorithm&gt;</a></code><br>
-     Defined in header <code><a href="../header/utility.html" title="cpp/header/utility">&lt;utility&gt;</a></code>
-</div></td>
-<td></td>
-</tr>
-<tr class="t-dcl">
-  <td>
-    <span>void</span>
-    foo
-    <span>()</span>
-  </td>
-  <td>this is matched</td>
-</tr>
-</tbody></table>
-"""
-    self.assertEqual(_ParseSymbolPage(html, "foo"),
-                     set(['<algorithm>', '<utility>']))
-
-  def testParseSymbolPage_MulSymbolsInSameTd(self):
-    # defined in header <cstdint>
-    #   int8_t
-    #   int16_t
-    html = """
-<table class="t-dcl-begin"><tbody>
-<tr class="t-dsc-header">
-<td><div>
-     Defined in header <code><a href="cstdint.html" title="cstdint">&lt;cstdint&gt;</a></code><br>
-</div></td>
-<td></td>
-</tr>
-<tr class="t-dcl">
-  <td>
-    <span>int8_t</span>
-    <span>int16_t</span>
-  </td>
-  <td>this is matched</td>
-</tr>
-</tbody></table>
-"""
-    self.assertEqual(_ParseSymbolPage(html, "int8_t"),
-                     set(['<cstdint>']))
-    self.assertEqual(_ParseSymbolPage(html, "int16_t"),
-                     set(['<cstdint>']))
-
-
-if __name__ == '__main__':
-  unittest.main()
Index: clang/tools/include-mapping/gen_std.py
===================================================================
--- clang/tools/include-mapping/gen_std.py
+++ clang/tools/include-mapping/gen_std.py
@@ -11,6 +11,8 @@
 include headers) for C/C++ Standard Library symbols by parsing archieved HTML
 files from cppreference.
 
+The generated files live in clang/include/Tooling/Inclusions/
+
 Caveats and FIXMEs:
   - only symbols directly in "std" namespace are added, we should also add std's
     subnamespace symbols (e.g. chrono).
@@ -44,7 +46,8 @@
 // Used to build a lookup table (qualified names => include headers) for %s
 // Standard Library symbols.
 //
-// Automatically generated file, DO NOT EDIT!
+// This file was generated automatically by
+// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
 //
 // Generated from cppreference offline HTML book (modified on %s).
 //===----------------------------------------------------------------------===//
Index: clang-tools-extra/clangd/include-mapping/cppreference_parser.py
===================================================================
--- /dev/null
+++ clang-tools-extra/clangd/include-mapping/cppreference_parser.py
@@ -1,184 +0,0 @@
-#!/usr/bin/env python
-#===- cppreference_parser.py -  ------------------------------*- python -*--===#
-#
-# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-# See https://llvm.org/LICENSE.txt for license information.
-# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-#
-#===------------------------------------------------------------------------===#
-
-from bs4 import BeautifulSoup, NavigableString
-
-import collections
-import multiprocessing
-import os
-import re
-import signal
-import sys
-
-
-class Symbol:
-
-  def __init__(self, name, namespace, headers):
-    # unqualifed symbol name, e.g. "move"
-    self.name = name
-    # namespace of the symbol (with trailing "::"), e.g. "std::", "" (global scope)
-    # None for C symbols.
-    self.namespace = namespace
-    # a list of corresponding headers
-    self.headers = headers
-
-
-def _HasClass(tag, *classes):
-  for c in tag.get('class', []):
-    if c in classes:
-      return True
-  return False
-
-
-def _ParseSymbolPage(symbol_page_html, symbol_name):
-  """Parse symbol page and retrieve the include header defined in this page.
-  The symbol page provides header for the symbol, specifically in
-  "Defined in header <header>" section. An example:
-
-  <tr class="t-dsc-header">
-    <td colspan="2"> <div>Defined in header <code>&lt;ratio&gt;</code> </div>
-  </td></tr>
-
-  Returns a list of headers.
-  """
-  headers = set()
-  all_headers = set()
-
-  soup = BeautifulSoup(symbol_page_html, "html.parser")
-  # Rows in table are like:
-  #   Defined in header <foo>      .t-dsc-header
-  #   Defined in header <bar>      .t-dsc-header
-  #   decl1                        .t-dcl
-  #   Defined in header <baz>      .t-dsc-header
-  #   decl2                        .t-dcl
-  for table in soup.select('table.t-dcl-begin, table.t-dsc-begin'):
-    current_headers = []
-    was_decl = False
-    for row in table.select('tr'):
-      if _HasClass(row, 't-dcl', 't-dsc'):
-        was_decl = True
-        # Symbols are in the first cell.
-        found_symbols = row.find('td').stripped_strings
-        if not symbol_name in found_symbols:
-          continue
-        headers.update(current_headers)
-      elif _HasClass(row, 't-dsc-header'):
-        # If we saw a decl since the last header, this is a new block of headers
-        # for a new block of decls.
-        if was_decl:
-          current_headers = []
-        was_decl = False
-        # There are also .t-dsc-header for "defined in namespace".
-        if not "Defined in header " in row.text:
-          continue
-        # The interesting header content (e.g. <cstdlib>) is wrapped in <code>.
-        for header_code in row.find_all("code"):
-          current_headers.append(header_code.text)
-          all_headers.add(header_code.text)
-  # If the symbol was never named, consider all named headers.
-  return headers or all_headers
-
-
-def _ParseIndexPage(index_page_html):
-  """Parse index page.
-  The index page lists all std symbols and hrefs to their detailed pages
-  (which contain the defined header). An example:
-
-  <a href="abs.html" title="abs"><tt>abs()</tt></a> (int) <br>
-  <a href="acos.html" title="acos"><tt>acos()</tt></a> <br>
-
-  Returns a list of tuple (symbol_name, relative_path_to_symbol_page, variant).
-  """
-  symbols = []
-  soup = BeautifulSoup(index_page_html, "html.parser")
-  for symbol_href in soup.select("a[title]"):
-    # Ignore annotated symbols like "acos<>() (std::complex)".
-    # These tend to be overloads, and we the primary is more useful.
-    # This accidentally accepts begin/end despite the (iterator) caption: the
-    # (since C++11) note is first. They are good symbols, so the bug is unfixed.
-    caption = symbol_href.next_sibling
-    variant = None
-    if isinstance(caption, NavigableString) and "(" in caption:
-      variant = caption.text.strip(" ()")
-    symbol_tt = symbol_href.find("tt")
-    if symbol_tt:
-      symbols.append((symbol_tt.text.rstrip("<>()"), # strip any trailing <>()
-                      symbol_href["href"], variant))
-  return symbols
-
-
-def _ReadSymbolPage(path, name):
-  with open(path) as f:
-    return _ParseSymbolPage(f.read(), name)
-
-
-def _GetSymbols(pool, root_dir, index_page_name, namespace, variants_to_accept):
-  """Get all symbols listed in the index page. All symbols should be in the
-  given namespace.
-
-  Returns a list of Symbols.
-  """
-
-  # Workflow steps:
-  #   1. Parse index page which lists all symbols to get symbol
-  #      name (unqualified name) and its href link to the symbol page which
-  #      contains the defined header.
-  #   2. Parse the symbol page to get the defined header.
-  index_page_path = os.path.join(root_dir, index_page_name)
-  with open(index_page_path, "r") as f:
-    # Read each symbol page in parallel.
-    results = [] # (symbol_name, promise of [header...])
-    for symbol_name, symbol_page_path, variant in _ParseIndexPage(f.read()):
-      # Variant symbols (e.g. the std::locale version of isalpha) add ambiguity.
-      # FIXME: use these as a fallback rather than ignoring entirely.
-      variants_for_symbol = variants_to_accept.get(
-          (namespace or "") + symbol_name, ())
-      if variant and variant not in variants_for_symbol:
-        continue
-      path = os.path.join(root_dir, symbol_page_path)
-      results.append((symbol_name,
-                      pool.apply_async(_ReadSymbolPage, (path, symbol_name))))
-
-    # Build map from symbol name to a set of headers.
-    symbol_headers = collections.defaultdict(set)
-    for symbol_name, lazy_headers in results:
-      symbol_headers[symbol_name].update(lazy_headers.get())
-
-  symbols = []
-  for name, headers in sorted(symbol_headers.items(), key=lambda t : t[0]):
-    symbols.append(Symbol(name, namespace, list(headers)))
-  return symbols
-
-
-def GetSymbols(parse_pages):
-  """Get all symbols by parsing the given pages.
-
-  Args:
-    parse_pages: a list of tuples (page_root_dir, index_page_name, namespace)
-  """
-  # By default we prefer the non-variant versions, as they're more common. But
-  # there are some symbols, whose variant is more common. This list describes
-  # those symbols.
-  variants_to_accept = {
-      # std::remove<> has variant algorithm.
-      "std::remove": ("algorithm"),
-  }
-  symbols = []
-  # Run many workers to process individual symbol pages under the symbol index.
-  # Don't allow workers to capture Ctrl-C.
-  pool = multiprocessing.Pool(
-      initializer=lambda: signal.signal(signal.SIGINT, signal.SIG_IGN))
-  try:
-    for root_dir, page_name, namespace in parse_pages:
-      symbols.extend(_GetSymbols(pool, root_dir, page_name, namespace,
-                                 variants_to_accept))
-  finally:
-    pool.terminate()
-    pool.join()
-  return symbols
Index: clang/lib/Tooling/Inclusions/StandardLibrary.cpp
===================================================================
--- /dev/null
+++ clang/lib/Tooling/Inclusions/StandardLibrary.cpp
@@ -0,0 +1,154 @@
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+
+namespace clang {
+namespace stdlib {
+
+static llvm::StringRef *HeaderNames;
+static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
+static unsigned *SymbolHeaderIDs;
+static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
+// Maps symbol name -> Symbol::ID, within a namespace.
+using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
+
+static int initialize() {
+  unsigned SymCount = 0;
+#define SYMBOL(Name, NS, Header) ++SymCount;
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
+  SymbolHeaderIDs =
+      new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
+  NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
+  HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
+
+  auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
+    auto R = NamespaceSymbols->try_emplace(NS, nullptr);
+    if (R.second)
+      R.first->second = new NSSymbolMap();
+    return *R.first->second;
+  };
+
+  auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
+    return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
+  };
+
+  auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
+                              llvm::StringRef HeaderName) mutable {
+    if (NS == "None")
+      NS = "";
+
+    SymbolNames[SymIndex] = {NS, Name};
+    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
+
+    NSSymbolMap &NSSymbols = AddNS(NS);
+    NSSymbols.try_emplace(Name, SymIndex);
+
+    ++SymIndex;
+  };
+#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
+#undef SYMBOL
+
+  HeaderNames = new llvm::StringRef[HeaderIDs->size()];
+  for (const auto &E : *HeaderIDs)
+    HeaderNames[E.second] = E.first;
+
+  return 0;
+}
+
+static void ensureInitialized() {
+  static int Dummy = initialize();
+  (void)Dummy;
+}
+
+llvm::Optional<Header> Header::named(llvm::StringRef Name) {
+  ensureInitialized();
+  auto It = HeaderIDs->find(Name);
+  if (It == HeaderIDs->end())
+    return llvm::None;
+  return Header(It->second);
+}
+llvm::StringRef Header::name() const { return HeaderNames[ID]; }
+llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
+llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
+llvm::Optional<Symbol> Symbol::named(llvm::StringRef Scope,
+                                     llvm::StringRef Name) {
+  ensureInitialized();
+  if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
+    auto It = NSSymbols->find(Name);
+    if (It != NSSymbols->end())
+      return Symbol(It->second);
+  }
+  return llvm::None;
+}
+Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
+llvm::SmallVector<Header> Symbol::headers() const {
+  return {header()}; // FIXME: multiple in case of ambiguity
+}
+
+Recognizer::Recognizer() { ensureInitialized(); }
+
+NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
+  auto It = NamespaceCache.find(D);
+  if (It != NamespaceCache.end())
+    return It->second;
+
+  NSSymbolMap *Result = [&]() -> NSSymbolMap * {
+    if (!D) // Nullptr means the global namespace
+      return NamespaceSymbols->lookup("");
+    if (D->isAnonymousNamespace())
+      return nullptr;
+    // Get namespace symbols for the first non-inline parent.
+    if (D->isInlineNamespace())
+      if (auto *Parent = llvm::dyn_cast_or_null<NamespaceDecl>(D->getParent()))
+        return namespaceSymbols(Parent);
+    return NamespaceSymbols->lookup((D->getName() + "::").str());
+  }();
+  NamespaceCache.try_emplace(D, Result);
+  return Result;
+}
+
+llvm::Optional<Symbol> Recognizer::operator()(const Decl *D) {
+  // If D is std::vector::iterator, `vector` is the outer symbol to look up.
+  // We keep all the candidate DCs as some may turn out to be anon enums.
+  // Do this resolution lazily as we may turn out not to have a std namespace.
+  llvm::SmallVector<const DeclContext *> IntermediateDecl;
+  const DeclContext *DC = D->getDeclContext();
+  while (DC && !DC->isNamespace()) {
+    if (NamedDecl::classofKind(DC->getDeclKind()))
+      IntermediateDecl.push_back(DC);
+    DC = DC->getParent();
+  }
+  NSSymbolMap *Symbols = namespaceSymbols(cast_or_null<NamespaceDecl>(DC));
+  if (!Symbols)
+    return llvm::None;
+
+  llvm::StringRef Name = [&]() -> llvm::StringRef {
+    for (const auto *SymDC : llvm::reverse(IntermediateDecl)) {
+      DeclarationName N = cast<NamedDecl>(SymDC)->getDeclName();
+      if (const auto *II = N.getAsIdentifierInfo())
+        return II->getName();
+      if (!N.isEmpty())
+        return ""; // e.g. operator<: give up
+    }
+    if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
+      if (const auto *II = ND->getIdentifier())
+        return II->getName();
+    return "";
+  }();
+  if (Name.empty())
+    return llvm::None;
+
+  auto It = Symbols->find(Name);
+  if (It == Symbols->end())
+    return llvm::None;
+  return Symbol(It->second);
+}
+
+} // namespace stdlib
+} // namespace clang
Index: clang/lib/Tooling/Inclusions/CMakeLists.txt
===================================================================
--- clang/lib/Tooling/Inclusions/CMakeLists.txt
+++ clang/lib/Tooling/Inclusions/CMakeLists.txt
@@ -3,6 +3,7 @@
 add_clang_library(clangToolingInclusions
   HeaderIncludes.cpp
   IncludeStyle.cpp
+  StandardLibrary.cpp
 
   LINK_LIBS
   clangBasic
Index: clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
===================================================================
--- clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
+++ clang/include/clang/Tooling/Inclusions/StdSymbolMap.inc
@@ -3,7 +3,8 @@
 // Used to build a lookup table (qualified names => include headers) for CPP
 // Standard Library symbols.
 //
-// Automatically generated file, DO NOT EDIT!
+// This file was generated automatically by
+// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
 //
 // Generated from cppreference offline HTML book (modified on 2018-10-28).
 //===----------------------------------------------------------------------===//
Index: clang/include/clang/Tooling/Inclusions/StandardLibrary.h
===================================================================
--- /dev/null
+++ clang/include/clang/Tooling/Inclusions/StandardLibrary.h
@@ -0,0 +1,110 @@
+#include "clang/AST/Decl.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace clang {
+namespace stdlib {
+
+class Symbol;
+
+// A standard library header, such as <iostream>
+// Lightweight class, in fact just an index into a table.
+class Header {
+public:
+  static llvm::Optional<Header> named(llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
+    return OS << H.name();
+  }
+  llvm::StringRef name() const;
+
+private:
+  Header(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend Symbol;
+  friend llvm::DenseMapInfo<Header>;
+  friend bool operator==(const Header &L, const Header &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A top-level standard library symbol, such as std::vector
+// Lightweight class, in fact just an index into a table.
+class Symbol {
+public:
+  static llvm::Optional<Symbol> named(llvm::StringRef Scope,
+                                      llvm::StringRef Name);
+
+  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
+    return OS << S.scope() << S.name();
+  }
+  llvm::StringRef scope() const;
+  llvm::StringRef name() const;
+  // The preferred header for this symbol (e.g. the suggested insertion).
+  Header header() const;
+  // Some symbols may be provided my multiple headers.
+  llvm::SmallVector<Header> headers() const;
+
+private:
+  Symbol(unsigned ID) : ID(ID) {}
+  unsigned ID;
+  friend class Recognizer;
+  friend llvm::DenseMapInfo<Symbol>;
+  friend bool operator==(const Symbol &L, const Symbol &R) {
+    return L.ID == R.ID;
+  }
+};
+
+// A functor to find the stdlib::Symbol associated with a decl.
+//
+// For non-top-level decls (std::vector<int>::iterator), returns the top-level
+// symbol (std::vector).
+class Recognizer {
+public:
+  Recognizer();
+  llvm::Optional<Symbol> operator()(const Decl *D);
+
+private:
+  using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
+  NSSymbolMap *namespaceSymbols(const NamespaceDecl *D);
+  llvm::DenseMap<const DeclContext *, NSSymbolMap *> NamespaceCache;
+};
+
+} // namespace stdlib
+} // namespace clang
+
+namespace llvm {
+
+template <> struct DenseMapInfo<clang::stdlib::Header> {
+  static inline clang::stdlib::Header getEmptyKey() {
+    return clang::stdlib::Header(-1);
+  }
+  static inline clang::stdlib::Header getTombstoneKey() {
+    return clang::stdlib::Header(-2);
+  }
+  static unsigned getHashValue(const clang::stdlib::Header &H) {
+    return hash_value(H.ID);
+  }
+  static bool isEqual(const clang::stdlib::Header &LHS,
+                      const clang::stdlib::Header &RHS) {
+    return LHS == RHS;
+  }
+};
+
+template <> struct DenseMapInfo<clang::stdlib::Symbol> {
+  static inline clang::stdlib::Symbol getEmptyKey() {
+    return clang::stdlib::Symbol(-1);
+  }
+  static inline clang::stdlib::Symbol getTombstoneKey() {
+    return clang::stdlib::Symbol(-2);
+  }
+  static unsigned getHashValue(const clang::stdlib::Symbol &S) {
+    return hash_value(S.ID);
+  }
+  static bool isEqual(const clang::stdlib::Symbol &LHS,
+                      const clang::stdlib::Symbol &RHS) {
+    return LHS == RHS;
+  }
+};
+} // namespace llvm
Index: clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
===================================================================
--- clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
+++ clang/include/clang/Tooling/Inclusions/CSymbolMap.inc
@@ -3,7 +3,8 @@
 // Used to build a lookup table (qualified names => include headers) for C
 // Standard Library symbols.
 //
-// Automatically generated file, DO NOT EDIT!
+// This file was generated automatically by
+// clang/tools/include-mapping/gen_std.py, DO NOT EDIT!
 //
 // Generated from cppreference offline HTML book (modified on 2018-10-28).
 //===----------------------------------------------------------------------===//
Index: clang-tools-extra/clangd/unittests/HeadersTests.cpp
===================================================================
--- clang-tools-extra/clangd/unittests/HeadersTests.cpp
+++ clang-tools-extra/clangd/unittests/HeadersTests.cpp
@@ -409,22 +409,6 @@
   EXPECT_FALSE(Includes.isSelfContained(getID("pp_depend.h", Includes)));
 }
 
-TEST(StdlibTest, All) {
-  auto VectorH = stdlib::Header::named("<vector>");
-  EXPECT_TRUE(VectorH);
-  EXPECT_EQ(llvm::to_string(*VectorH), "<vector>");
-  EXPECT_FALSE(stdlib::Header::named("HeadersTests.cpp"));
-
-  auto Vector = stdlib::Symbol::named("std::", "vector");
-  EXPECT_TRUE(Vector);
-  EXPECT_EQ(llvm::to_string(*Vector), "std::vector");
-  EXPECT_FALSE(stdlib::Symbol::named("std::", "dongle"));
-  EXPECT_FALSE(stdlib::Symbol::named("clang::", "ASTContext"));
-
-  EXPECT_EQ(Vector->header(), *VectorH);
-  EXPECT_THAT(Vector->headers(), ElementsAre(*VectorH));
-}
-
 TEST(StdlibTest, Recognizer) {
   auto TU = TestTU::withCode(R"cpp(
     namespace std {
Index: clang-tools-extra/clangd/index/CanonicalIncludes.cpp
===================================================================
--- clang-tools-extra/clangd/index/CanonicalIncludes.cpp
+++ clang-tools-extra/clangd/index/CanonicalIncludes.cpp
@@ -85,7 +85,7 @@
   if (Language.CPlusPlus) {
     static const auto *Symbols = new llvm::StringMap<llvm::StringRef>({
 #define SYMBOL(Name, NameSpace, Header) {#NameSpace #Name, #Header},
-#include "StdSymbolMap.inc"
+#include "clang/Tooling/Inclusions/StdSymbolMap.inc"
         // There are two std::move()s, this is by far the most common.
         SYMBOL(move, std::, <utility>)
         // There are multiple headers for size_t, pick one.
@@ -96,7 +96,7 @@
   } else if (Language.C11) {
     static const auto *CSymbols = new llvm::StringMap<llvm::StringRef>({
 #define SYMBOL(Name, NameSpace, Header) {#Name, #Header},
-#include "CSymbolMap.inc"
+#include "clang/Tooling/Inclusions/CSymbolMap.inc"
         // There are multiple headers for size_t, pick one.
         SYMBOL(size_t, None, <stddef.h>)
 #undef SYMBOL
Index: clang-tools-extra/clangd/Headers.h
===================================================================
--- clang-tools-extra/clangd/Headers.h
+++ clang-tools-extra/clangd/Headers.h
@@ -22,6 +22,7 @@
 #include "clang/Lex/PPCallbacks.h"
 #include "clang/Lex/Preprocessor.h"
 #include "clang/Tooling/Inclusions/HeaderIncludes.h"
+#include "clang/Tooling/Inclusions/StandardLibrary.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/StringRef.h"
@@ -32,79 +33,8 @@
 #include <string>
 
 namespace clang {
-class Decl;
-class NamespaceDecl;
 namespace clangd {
 
-// clangd has a built-in database of standard library symbols.
-namespace stdlib {
-class Symbol;
-
-// A standard library header, such as <iostream>
-// Lightweight class, in fact just an index into a table.
-class Header {
-public:
-  static llvm::Optional<Header> named(llvm::StringRef Name);
-
-  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
-    return OS << H.name();
-  }
-  llvm::StringRef name() const;
-
-private:
-  Header(unsigned ID) : ID(ID) {}
-  unsigned ID;
-  friend Symbol;
-  friend llvm::DenseMapInfo<Header>;
-  friend bool operator==(const Header &L, const Header &R) {
-    return L.ID == R.ID;
-  }
-};
-
-// A top-level standard library symbol, such as std::vector
-// Lightweight class, in fact just an index into a table.
-class Symbol {
-public:
-  static llvm::Optional<Symbol> named(llvm::StringRef Scope,
-                                      llvm::StringRef Name);
-
-  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
-    return OS << S.scope() << S.name();
-  }
-  llvm::StringRef scope() const;
-  llvm::StringRef name() const;
-  // The preferred header for this symbol (e.g. the suggested insertion).
-  Header header() const;
-  // Some symbols may be provided my multiple headers.
-  llvm::SmallVector<Header> headers() const;
-
-private:
-  Symbol(unsigned ID) : ID(ID) {}
-  unsigned ID;
-  friend class Recognizer;
-  friend llvm::DenseMapInfo<Symbol>;
-  friend bool operator==(const Symbol &L, const Symbol &R) {
-    return L.ID == R.ID;
-  }
-};
-
-// A functor to find the stdlib::Symbol associated with a decl.
-//
-// For non-top-level decls (std::vector<int>::iterator), returns the top-level
-// symbol (std::vector).
-class Recognizer {
-public:
-  Recognizer();
-  llvm::Optional<Symbol> operator()(const Decl *D);
-
-private:
-  using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
-  NSSymbolMap *namespaceSymbols(const NamespaceDecl *D);
-  llvm::DenseMap<const DeclContext *, NSSymbolMap *> NamespaceCache;
-};
-
-} // namespace stdlib
-
 /// Returns true if \p Include is literal include like "path" or <path>.
 bool isLiteralInclude(llvm::StringRef Include);
 
@@ -341,38 +271,6 @@
   }
 };
 
-template <> struct DenseMapInfo<clang::clangd::stdlib::Header> {
-  static inline clang::clangd::stdlib::Header getEmptyKey() {
-    return clang::clangd::stdlib::Header(-1);
-  }
-  static inline clang::clangd::stdlib::Header getTombstoneKey() {
-    return clang::clangd::stdlib::Header(-2);
-  }
-  static unsigned getHashValue(const clang::clangd::stdlib::Header &H) {
-    return hash_value(H.ID);
-  }
-  static bool isEqual(const clang::clangd::stdlib::Header &LHS,
-                      const clang::clangd::stdlib::Header &RHS) {
-    return LHS == RHS;
-  }
-};
-
-template <> struct DenseMapInfo<clang::clangd::stdlib::Symbol> {
-  static inline clang::clangd::stdlib::Symbol getEmptyKey() {
-    return clang::clangd::stdlib::Symbol(-1);
-  }
-  static inline clang::clangd::stdlib::Symbol getTombstoneKey() {
-    return clang::clangd::stdlib::Symbol(-2);
-  }
-  static unsigned getHashValue(const clang::clangd::stdlib::Symbol &S) {
-    return hash_value(S.ID);
-  }
-  static bool isEqual(const clang::clangd::stdlib::Symbol &LHS,
-                      const clang::clangd::stdlib::Symbol &RHS) {
-    return LHS == RHS;
-  }
-};
-
 } // namespace llvm
 
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H
Index: clang-tools-extra/clangd/Headers.cpp
===================================================================
--- clang-tools-extra/clangd/Headers.cpp
+++ clang-tools-extra/clangd/Headers.cpp
@@ -350,154 +350,5 @@
                   RHS.Resolved, RHS.Written);
 }
 
-namespace stdlib {
-static llvm::StringRef *HeaderNames;
-static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
-static unsigned *SymbolHeaderIDs;
-static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
-// Maps symbol name -> Symbol::ID, within a namespace.
-using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
-static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
-
-static int initialize() {
-  unsigned SymCount = 0;
-#define SYMBOL(Name, NS, Header) ++SymCount;
-#include "CSymbolMap.inc"
-#include "StdSymbolMap.inc"
-#undef SYMBOL
-  SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
-  SymbolHeaderIDs =
-      new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
-  NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
-  HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
-
-  auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
-    auto R = NamespaceSymbols->try_emplace(NS, nullptr);
-    if (R.second)
-      R.first->second = new NSSymbolMap();
-    return *R.first->second;
-  };
-
-  auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
-    return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
-  };
-
-  auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
-                              llvm::StringRef HeaderName) mutable {
-    if (NS == "None")
-      NS = "";
-
-    SymbolNames[SymIndex] = {NS, Name};
-    SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
-
-    NSSymbolMap &NSSymbols = AddNS(NS);
-    NSSymbols.try_emplace(Name, SymIndex);
-
-    ++SymIndex;
-  };
-#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
-#include "CSymbolMap.inc"
-#include "StdSymbolMap.inc"
-#undef SYMBOL
-
-  HeaderNames = new llvm::StringRef[HeaderIDs->size()];
-  for (const auto &E : *HeaderIDs)
-    HeaderNames[E.second] = E.first;
-
-  return 0;
-}
-
-static void ensureInitialized() {
-  static int Dummy = initialize();
-  (void)Dummy;
-}
-
-llvm::Optional<Header> Header::named(llvm::StringRef Name) {
-  ensureInitialized();
-  auto It = HeaderIDs->find(Name);
-  if (It == HeaderIDs->end())
-    return llvm::None;
-  return Header(It->second);
-}
-llvm::StringRef Header::name() const { return HeaderNames[ID]; }
-llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
-llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
-llvm::Optional<Symbol> Symbol::named(llvm::StringRef Scope,
-                                     llvm::StringRef Name) {
-  ensureInitialized();
-  if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
-    auto It = NSSymbols->find(Name);
-    if (It != NSSymbols->end())
-      return Symbol(It->second);
-  }
-  return llvm::None;
-}
-Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
-llvm::SmallVector<Header> Symbol::headers() const {
-  return {header()}; // FIXME: multiple in case of ambiguity
-}
-
-Recognizer::Recognizer() { ensureInitialized(); }
-
-NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
-  auto It = NamespaceCache.find(D);
-  if (It != NamespaceCache.end())
-    return It->second;
-
-  NSSymbolMap *Result = [&]() -> NSSymbolMap * {
-    if (!D) // Nullptr means the global namespace
-      return NamespaceSymbols->lookup("");
-    if (D->isAnonymousNamespace())
-      return nullptr;
-    if (D->isInlineNamespace()) {
-      if (auto *Parent = llvm::dyn_cast_or_null<NamespaceDecl>(D->getParent()))
-        return namespaceSymbols(Parent);
-      return nullptr;
-    }
-    return NamespaceSymbols->lookup(printNamespaceScope(*D));
-  }();
-  NamespaceCache.try_emplace(D, Result);
-  return Result;
-}
-
-llvm::Optional<Symbol> Recognizer::operator()(const Decl *D) {
-  // If D is std::vector::iterator, `vector` is the outer symbol to look up.
-  // We keep all the candidate DCs as some may turn out to be anon enums.
-  // Do this resolution lazily as we may turn out not to have a std namespace.
-  llvm::SmallVector<const DeclContext *> IntermediateDecl;
-  const DeclContext *DC = D->getDeclContext();
-  while (DC && !DC->isNamespace()) {
-    if (NamedDecl::classofKind(DC->getDeclKind()))
-      IntermediateDecl.push_back(DC);
-    DC = DC->getParent();
-  }
-  NSSymbolMap *Symbols = namespaceSymbols(cast_or_null<NamespaceDecl>(DC));
-  if (!Symbols)
-    return llvm::None;
-
-  llvm::StringRef Name = [&]() -> llvm::StringRef {
-    for (const auto *SymDC : llvm::reverse(IntermediateDecl)) {
-      DeclarationName N = cast<NamedDecl>(SymDC)->getDeclName();
-      if (const auto *II = N.getAsIdentifierInfo())
-        return II->getName();
-      if (!N.isEmpty())
-        return ""; // e.g. operator<: give up
-    }
-    if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
-      if (const auto *II = ND->getIdentifier())
-        return II->getName();
-    return "";
-  }();
-  if (Name.empty())
-    return llvm::None;
-
-  auto It = Symbols->find(Name);
-  if (It == Symbols->end())
-    return llvm::None;
-  return Symbol(It->second);
-}
-
-} // namespace stdlib
-
 } // namespace clangd
 } // namespace clang
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to