sammccall created this revision.
sammccall added a reviewer: adamcz.
Herald added subscribers: kadircet, arphaman.
Herald added a project: All.
sammccall requested review of this revision.
Herald added subscribers: cfe-commits, MaskRay, ilya-biryukov.
Herald added a project: clang-tools-extra.

Production profiles show that generateProximityURIs is roughly 3.8% of
buildPreamble. Of this, the majority (3% of buildPreamble) is parsing
and reserializing URIs.

We can do this with ugly string manipulation instead.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D135226

Files:
  clang-tools-extra/clangd/index/dex/Dex.cpp
  clang-tools-extra/clangd/index/dex/Dex.h

Index: clang-tools-extra/clangd/index/dex/Dex.h
===================================================================
--- clang-tools-extra/clangd/index/dex/Dex.h
+++ clang-tools-extra/clangd/index/dex/Dex.h
@@ -132,7 +132,7 @@
 /// Should be used within the index build process.
 ///
 /// This function is exposed for testing only.
-std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
+llvm::SmallVector<llvm::StringRef, 5> generateProximityURIs(const char *);
 
 } // namespace dex
 } // namespace clangd
Index: clang-tools-extra/clangd/index/dex/Dex.cpp
===================================================================
--- clang-tools-extra/clangd/index/dex/Dex.cpp
+++ clang-tools-extra/clangd/index/dex/Dex.cpp
@@ -163,8 +163,8 @@
   llvm::StringMap<SourceParams> Sources;
   for (const auto &Path : ProximityPaths) {
     Sources[Path] = SourceParams();
-    auto PathURI = URI::create(Path);
-    const auto PathProximityURIs = generateProximityURIs(PathURI.toString());
+    auto PathURI = URI::create(Path).toString();
+    const auto PathProximityURIs = generateProximityURIs(PathURI.c_str());
     for (const auto &ProximityURI : PathProximityURIs)
       ParentURIs.insert(ProximityURI);
   }
@@ -353,30 +353,59 @@
   return Bytes + BackingDataSize;
 }
 
-std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath) {
-  std::vector<std::string> Result;
-  auto ParsedURI = URI::parse(URIPath);
-  assert(ParsedURI &&
-         "Non-empty argument of generateProximityURIs() should be a valid "
-         "URI.");
-  llvm::StringRef Body = ParsedURI->body();
-  // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
-  // size of resulting vector. Some projects might want to have higher limit if
-  // the file hierarchy is deeper. For the generic case, it would be useful to
-  // calculate Limit in the index build stage by calculating the maximum depth
-  // of the project source tree at runtime.
-  size_t Limit = 5;
-  // Insert original URI before the loop: this would save a redundant iteration
-  // with a URI parse.
-  Result.emplace_back(ParsedURI->toString());
-  while (!Body.empty() && --Limit > 0) {
-    // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and
-    // could be optimized.
-    Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
-    if (!Body.empty())
-      Result.emplace_back(
-          URI(ParsedURI->scheme(), ParsedURI->authority(), Body).toString());
+// Given foo://bar/one/two
+// Returns        ^
+const char *findPathInURI(const char *S) {
+  // Skip over scheme.
+  for (;;) {
+    if (!*S)
+      return S;
+    if (*S++ == ':')
+      break;
   }
+  // Skip over authority.
+  if (*S == '/' && *(S+1) == '/') {
+    S += 2;
+    while (*S && *S != '/')
+      ++S;
+  }
+  return S;
+}
+
+// FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum
+// size of resulting vector. Some projects might want to have higher limit if
+// the file hierarchy is deeper. For the generic case, it would be useful to
+// calculate Limit in the index build stage by calculating the maximum depth
+// of the project source tree at runtime.
+constexpr unsigned ProximityURILimit = 5;
+
+llvm::SmallVector<llvm::StringRef, ProximityURILimit>
+generateProximityURIs(const char *URI) {
+  // This function is hot when indexing, so don't parse/reserialize URIPath,
+  // just emit substrings of it instead.
+  //
+  // foo://bar/one/two
+  // ^URI     ^Path   ^End
+  const char *Path = findPathInURI(URI);
+  const char *End = Path;
+  while (*End)
+    ++End;
+  // The original URI is a proximity path.
+  llvm::SmallVector<llvm::StringRef, ProximityURILimit> Result = {
+      StringRef(URI, End - URI)};
+  unsigned Limit = ProximityURILimit - 1;
+  while (--End != Path) { // foo://bar is not a proximity path.
+    if (*End == '/') {
+      // foo://bar/one/two
+      //              ^End
+      Result.push_back(StringRef(URI, End - URI));
+      if (--Limit == 0)
+        return Result;
+    }
+  }
+  // The root foo://bar/ is a proximity path.
+  if (*Path == '/')
+    Result.push_back(StringRef(URI, Path + 1 - URI));
   return Result;
 }
 
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to