https://github.com/ilovepi updated 
https://github.com/llvm/llvm-project/pull/190045

>From 7f11bd41a15419ac3c61766517a2576d08e7c395 Mon Sep 17 00:00:00 2001
From: Paul Kirth <[email protected]>
Date: Wed, 18 Mar 2026 01:09:48 +0000
Subject: [PATCH] [clang-doc] Introduce TransientArena for short lived
 allocations

With strings interned, we can move the StringRefs in various Info
structs into a new short lived arena. This change migrates the remaining
SmallVectors in CommentInfo to use an ArrayRef backed by the new
transient arena.

This results in further minor reductions in overall memory usage, but no
significant effect on runtime performance.

| Metric | Baseline | Prev | This | Culm% | Seq% |
| :--- | :--- | :--- | :--- | :--- | :--- |
| Time | 920.5s | 1011.0s | 1005.7s | +9.2% | -0.5% |
| Memory | 86.0G | 44.9G | 42.1G | -51.0% | -6.2% |

| Benchmark | Baseline | Prev | This | Culm% | Seq% |
| :--- | :--- | :--- | :--- | :--- | :--- |
| BM_BitcodeReader_Scale/10 | 67.9us | 70.0us | 68.6us | +1.0% | -2.0% |
| BM_BitcodeReader_Scale/10000 | 70.5ms | 21.3ms | 21.3ms | -69.8% | -0.0% |
| BM_BitcodeReader_Scale/4096 | 23.2ms | 4.5ms | 4.6ms | -80.2% | +2.8% |
| BM_BitcodeReader_Scale/512 | 509.4us | 538.8us | 546.3us | +7.3% | +1.4% |
| BM_BitcodeReader_Scale/64 | 114.8us | 118.0us | 117.9us | +2.7% | -0.1% |
| BM_EmitInfoFunction | 1.6us | 1.6us | 1.5us | -5.5% | -6.2% |
| BM_Index_Insertion/10 | 2.3us | 4.0us | 3.9us | +70.3% | -0.7% |
| BM_Index_Insertion/10000 | 3.1ms | 5.0ms | 5.3ms | +68.6% | +5.0% |
| BM_Index_Insertion/4096 | 1.3ms | 2.0ms | 2.1ms | +64.2% | +4.5% |
| BM_Index_Insertion/512 | 153.6us | 245.0us | 253.0us | +64.8% | +3.2% |
| BM_Index_Insertion/64 | 18.1us | 28.9us | 30.1us | +67.0% | +4.4% |
| BM_JSONGenerator_Scale/10 | 36.8us | 36.4us | 37.0us | +0.4% | +1.7% |
| BM_JSONGenerator_Scale/10000 | 89.6ms | 90.4ms | 91.7ms | +2.3% | +1.5% |
| BM_JSONGenerator_Scale/4096 | 33.7ms | 34.0ms | 35.1ms | +4.0% | +3.0% |
| BM_JSONGenerator_Scale/64 | 222.4us | 220.5us | 223.3us | +0.4% | +1.3% |
| BM_Mapper_Scale/10000 | 104.3ms | 105.4ms | 105.6ms | +1.3% | +0.3% |
| BM_Mapper_Scale/4096 | 44.3ms | 44.7ms | 44.8ms | +1.0% | +0.1% |
| BM_Mapper_Scale/512 | 7.6ms | 7.7ms | 7.6ms | +0.7% | -1.2% |
| BM_MergeInfos_Scale/10000 | 12.2ms | 1.4ms | 1.4ms | -88.2% | +0.1% |
| BM_MergeInfos_Scale/2 | 1.9us | 1.7us | 1.7us | -8.5% | +2.1% |
| BM_MergeInfos_Scale/4096 | 2.8ms | 495.6us | 487.3us | -82.5% | -1.7% |
| BM_MergeInfos_Scale/512 | 68.9us | 34.6us | 38.7us | -43.9% | +11.6% |
| BM_MergeInfos_Scale/64 | 10.3us | 6.0us | 6.4us | -37.4% | +7.2% |
| BM_MergeInfos_Scale/8 | 2.8us | 2.1us | 2.2us | -20.6% | +5.1% |
| BM_SerializeFunctionInfo | 25.5us | 26.8us | 25.9us | +1.4% | -3.3% |
---
 clang-tools-extra/clang-doc/BitcodeReader.cpp | 85 ++++++++++++++++---
 .../clang-doc/Representation.cpp              |  4 +
 clang-tools-extra/clang-doc/Representation.h  | 19 ++++-
 clang-tools-extra/clang-doc/Serialize.cpp     | 34 +++++++-
 clang-tools-extra/clang-doc/YAMLGenerator.cpp | 15 ----
 .../clang-doc/tool/ClangDocMain.cpp           |  4 +-
 .../unittests/clang-doc/BitcodeTest.cpp       | 11 ++-
 .../unittests/clang-doc/MDGeneratorTest.cpp   | 11 ++-
 .../unittests/clang-doc/YAMLGeneratorTest.cpp | 11 ++-
 9 files changed, 153 insertions(+), 41 deletions(-)

diff --git a/clang-tools-extra/clang-doc/BitcodeReader.cpp 
b/clang-tools-extra/clang-doc/BitcodeReader.cpp
index 469a6e73eb42f..27984cf321b46 100644
--- a/clang-tools-extra/clang-doc/BitcodeReader.cpp
+++ b/clang-tools-extra/clang-doc/BitcodeReader.cpp
@@ -129,13 +129,6 @@ static llvm::Error decodeRecord(const Record &R, FieldId 
&Field,
                                  "invalid value for FieldId");
 }
 
-static llvm::Error decodeRecord(const Record &R,
-                                llvm::SmallVectorImpl<llvm::StringRef> &Field,
-                                llvm::StringRef Blob) {
-  Field.push_back(internString(Blob));
-  return llvm::Error::success();
-}
-
 static llvm::Error decodeRecord(const Record &R,
                                 llvm::SmallVectorImpl<Location> &Field,
                                 llvm::StringRef Blob) {
@@ -346,7 +339,10 @@ static llvm::Error parseRecord(const Record &R, unsigned 
ID,
 }
 
 static llvm::Error parseRecord(const Record &R, unsigned ID,
-                               llvm::StringRef Blob, CommentInfo *I) {
+                               llvm::StringRef Blob, CommentInfo *I,
+                               llvm::SmallVectorImpl<StringRef> &AttrKeys,
+                               llvm::SmallVectorImpl<StringRef> &AttrValues,
+                               llvm::SmallVectorImpl<StringRef> &Args) {
   llvm::SmallString<16> KindStr;
   switch (ID) {
   case COMMENT_KIND:
@@ -365,11 +361,14 @@ static llvm::Error parseRecord(const Record &R, unsigned 
ID,
   case COMMENT_CLOSENAME:
     return decodeRecord(R, I->CloseName, Blob);
   case COMMENT_ATTRKEY:
-    return decodeRecord(R, I->AttrKeys, Blob);
+    AttrKeys.push_back(internString(Blob));
+    return llvm::Error::success();
   case COMMENT_ATTRVAL:
-    return decodeRecord(R, I->AttrValues, Blob);
+    AttrValues.push_back(internString(Blob));
+    return llvm::Error::success();
   case COMMENT_ARG:
-    return decodeRecord(R, I->Args, Blob);
+    Args.push_back(internString(Blob));
+    return llvm::Error::success();
   case COMMENT_SELFCLOSING:
     return decodeRecord(R, I->SelfClosing, Blob);
   case COMMENT_EXPLICIT:
@@ -380,6 +379,70 @@ static llvm::Error parseRecord(const Record &R, unsigned 
ID,
   }
 }
 
+template <>
+llvm::Error ClangDocBitcodeReader::readBlock(unsigned ID, CommentInfo *I) {
+  llvm::TimeTraceScope("Reducing infos", "readBlock");
+  if (llvm::Error Err = Stream.EnterSubBlock(ID))
+    return Err;
+
+  llvm::SmallVector<StringRef> AttrKeys;
+  llvm::SmallVector<StringRef> AttrValues;
+  llvm::SmallVector<StringRef> Args;
+
+  while (true) {
+    unsigned BlockOrCode = 0;
+    llvm::Expected<Cursor> C = skipUntilRecordOrBlock(BlockOrCode);
+    if (!C)
+      return C.takeError();
+
+    switch (*C) {
+    case Cursor::BadBlock:
+      return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                     "bad block found");
+    case Cursor::BlockEnd: {
+      if (!AttrKeys.empty()) {
+        StringRef *KeysMem =
+            TransientArena.Allocate<StringRef>(AttrKeys.size());
+        std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
+        I->AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
+      }
+      if (!AttrValues.empty()) {
+        StringRef *ValuesMem =
+            TransientArena.Allocate<StringRef>(AttrValues.size());
+        std::uninitialized_copy(AttrValues.begin(), AttrValues.end(),
+                                ValuesMem);
+        I->AttrValues = llvm::ArrayRef<StringRef>(ValuesMem, 
AttrValues.size());
+      }
+      if (!Args.empty()) {
+        StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
+        std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
+        I->Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
+      }
+      return llvm::Error::success();
+    }
+    case Cursor::BlockBegin:
+      if (llvm::Error Err = readSubBlock(BlockOrCode, I)) {
+        if (llvm::Error Skipped = Stream.SkipBlock())
+          return joinErrors(std::move(Err), std::move(Skipped));
+        return Err;
+      }
+      continue;
+    case Cursor::Record:
+      break;
+    }
+
+    Record R;
+    llvm::StringRef Blob;
+    llvm::Expected<unsigned> MaybeRecID =
+        Stream.readRecord(BlockOrCode, R, &Blob);
+    if (!MaybeRecID)
+      return MaybeRecID.takeError();
+    if (llvm::Error Err = parseRecord(R, MaybeRecID.get(), Blob, I, AttrKeys,
+                                      AttrValues, Args))
+      return Err;
+  }
+}
+
 static llvm::Error parseRecord(const Record &R, unsigned ID,
                                llvm::StringRef Blob, Reference *I, FieldId &F) 
{
   switch (ID) {
diff --git a/clang-tools-extra/clang-doc/Representation.cpp 
b/clang-tools-extra/clang-doc/Representation.cpp
index 7a8be5a06e998..0467e375d8cc9 100644
--- a/clang-tools-extra/clang-doc/Representation.cpp
+++ b/clang-tools-extra/clang-doc/Representation.cpp
@@ -27,6 +27,10 @@
 namespace clang {
 namespace doc {
 
+// Thread local arenas usable in each thread pool
+thread_local llvm::BumpPtrAllocator TransientArena;
+thread_local llvm::BumpPtrAllocator PersistentArena;
+
 ConcurrentStringPool &getGlobalStringPool() {
   static ConcurrentStringPool GlobalPool;
   return GlobalPool;
diff --git a/clang-tools-extra/clang-doc/Representation.h 
b/clang-tools-extra/clang-doc/Representation.h
index 63af0abe564f4..4d5b6c92c9d90 100644
--- a/clang-tools-extra/clang-doc/Representation.h
+++ b/clang-tools-extra/clang-doc/Representation.h
@@ -49,6 +49,8 @@ class ConcurrentStringPool {
 
 ConcurrentStringPool &getGlobalStringPool();
 
+extern thread_local llvm::BumpPtrAllocator TransientArena;
+
 inline StringRef internString(const Twine &T) {
   if (T.isTriviallyEmpty())
     return StringRef();
@@ -67,6 +69,16 @@ inline StringRef internString(const Twine &T) {
   return getGlobalStringPool().intern(S);
 }
 
+template <typename T>
+inline llvm::ArrayRef<T> allocateArray(llvm::ArrayRef<T> V,
+                                       llvm::BumpPtrAllocator &Alloc) {
+  if (V.empty())
+    return llvm::ArrayRef<T>();
+  T *Allocated = (T *)Alloc.Allocate<T>(V.size());
+  std::uninitialized_move(V.begin(), V.end(), Allocated);
+  return llvm::ArrayRef<T>(Allocated, V.size());
+}
+
 // An abstraction for owned pointers. Initially mapped to OwnedPtr,
 // to be eventually transitioned to bare pointers in an arena.
 template <typename T> using OwnedPtr = std::unique_ptr<T>;
@@ -167,11 +179,10 @@ struct CommentInfo {
   StringRef ParamName;       // Parameter name (for (T)ParamCommand).
   StringRef CloseName;       // Closing tag name (for VerbatimBlock).
   StringRef Text;            // Text of the comment.
-  llvm::SmallVector<StringRef, 4>
-      AttrKeys; // List of attribute keys (for HTML).
-  llvm::SmallVector<StringRef, 4>
+  llvm::ArrayRef<StringRef> AttrKeys; // List of attribute keys (for HTML).
+  llvm::ArrayRef<StringRef>
       AttrValues; // List of attribute values for each key (for HTML).
-  llvm::SmallVector<StringRef, 4>
+  llvm::ArrayRef<StringRef>
       Args; // List of arguments to commands (for InlineCommand).
   CommentKind Kind = CommentKind::
       CK_Unknown; // Kind of comment (FullComment, ParagraphComment,
diff --git a/clang-tools-extra/clang-doc/Serialize.cpp 
b/clang-tools-extra/clang-doc/Serialize.cpp
index cf6d5325178d1..023e21dc763b1 100644
--- a/clang-tools-extra/clang-doc/Serialize.cpp
+++ b/clang-tools-extra/clang-doc/Serialize.cpp
@@ -244,18 +244,38 @@ void ClangDocCommentVisitor::visitTextComment(const 
TextComment *C) {
 void ClangDocCommentVisitor::visitInlineCommandComment(
     const InlineCommandComment *C) {
   CurrentCI.Name = internString(getCommandName(C->getCommandID()));
+  llvm::SmallVector<StringRef> Args;
   for (unsigned I = 0, E = C->getNumArgs(); I != E; ++I)
-    CurrentCI.Args.push_back(internString(C->getArgText(I).trim()));
+    Args.push_back(internString(C->getArgText(I).trim()));
+  if (!Args.empty()) {
+    StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
+    std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
+    CurrentCI.Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
+  }
 }
 
 void ClangDocCommentVisitor::visitHTMLStartTagComment(
     const HTMLStartTagComment *C) {
   CurrentCI.Name = internString(C->getTagName());
   CurrentCI.SelfClosing = C->isSelfClosing();
+  llvm::SmallVector<StringRef> AttrKeys;
+  llvm::SmallVector<StringRef> AttrValues;
   for (unsigned I = 0, E = C->getNumAttrs(); I < E; ++I) {
     const HTMLStartTagComment::Attribute &Attr = C->getAttr(I);
-    CurrentCI.AttrKeys.push_back(internString(Attr.Name));
-    CurrentCI.AttrValues.push_back(internString(Attr.Value));
+    AttrKeys.push_back(internString(Attr.Name));
+    AttrValues.push_back(internString(Attr.Value));
+  }
+  if (!AttrKeys.empty()) {
+    StringRef *KeysMem = TransientArena.Allocate<StringRef>(AttrKeys.size());
+    std::uninitialized_copy(AttrKeys.begin(), AttrKeys.end(), KeysMem);
+    CurrentCI.AttrKeys = llvm::ArrayRef<StringRef>(KeysMem, AttrKeys.size());
+  }
+  if (!AttrValues.empty()) {
+    StringRef *ValuesMem =
+        TransientArena.Allocate<StringRef>(AttrValues.size());
+    std::uninitialized_copy(AttrValues.begin(), AttrValues.end(), ValuesMem);
+    CurrentCI.AttrValues =
+        llvm::ArrayRef<StringRef>(ValuesMem, AttrValues.size());
   }
 }
 
@@ -268,8 +288,14 @@ void ClangDocCommentVisitor::visitHTMLEndTagComment(
 void ClangDocCommentVisitor::visitBlockCommandComment(
     const BlockCommandComment *C) {
   CurrentCI.Name = internString(getCommandName(C->getCommandID()));
+  llvm::SmallVector<StringRef> Args;
   for (unsigned I = 0, E = C->getNumArgs(); I < E; ++I)
-    CurrentCI.Args.push_back(internString(C->getArgText(I).trim()));
+    Args.push_back(internString(C->getArgText(I).trim()));
+  if (!Args.empty()) {
+    StringRef *ArgsMem = TransientArena.Allocate<StringRef>(Args.size());
+    std::uninitialized_copy(Args.begin(), Args.end(), ArgsMem);
+    CurrentCI.Args = llvm::ArrayRef<StringRef>(ArgsMem, Args.size());
+  }
 }
 
 void ClangDocCommentVisitor::visitParamCommandComment(
diff --git a/clang-tools-extra/clang-doc/YAMLGenerator.cpp 
b/clang-tools-extra/clang-doc/YAMLGenerator.cpp
index 1a9d144d499bd..390d225bded03 100644
--- a/clang-tools-extra/clang-doc/YAMLGenerator.cpp
+++ b/clang-tools-extra/clang-doc/YAMLGenerator.cpp
@@ -240,11 +240,6 @@ static void commentInfoMapping(IO &IO, CommentInfo &I) {
       QArgs.push_back(QuotedString(S));
   }
   IO.mapOptional("Args", QArgs, std::vector<QuotedString>());
-  if (!IO.outputting()) {
-    I.Args.clear();
-    for (auto &Q : QArgs)
-      I.Args.push_back(Q.Ref);
-  }
 
   std::vector<QuotedString> QAttrKeys;
   if (IO.outputting()) {
@@ -252,11 +247,6 @@ static void commentInfoMapping(IO &IO, CommentInfo &I) {
       QAttrKeys.push_back(QuotedString(S));
   }
   IO.mapOptional("AttrKeys", QAttrKeys, std::vector<QuotedString>());
-  if (!IO.outputting()) {
-    I.AttrKeys.clear();
-    for (auto &Q : QAttrKeys)
-      I.AttrKeys.push_back(Q.Ref);
-  }
 
   std::vector<QuotedString> QAttrValues;
   if (IO.outputting()) {
@@ -264,11 +254,6 @@ static void commentInfoMapping(IO &IO, CommentInfo &I) {
       QAttrValues.push_back(QuotedString(S));
   }
   IO.mapOptional("AttrValues", QAttrValues, std::vector<QuotedString>());
-  if (!IO.outputting()) {
-    I.AttrValues.clear();
-    for (auto &Q : QAttrValues)
-      I.AttrValues.push_back(Q.Ref);
-  }
 
   IO.mapOptional("Children", I.Children);
 }
diff --git a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp 
b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
index 8ea1aa4b9e89f..f627ee5887528 100644
--- a/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
+++ b/clang-tools-extra/clang-doc/tool/ClangDocMain.cpp
@@ -351,8 +351,10 @@ Example usage for a project using a compile commands 
database:
         DiagnosticsEngine::Error, "error reading bitcode: %0");
     unsigned DiagIDBitcodeMerging = Diags.getCustomDiagID(
         DiagnosticsEngine::Error, "error merging bitcode: %0");
-    // ExecutorConcurrency is a flag exposed by AllTUsExecution.h
+    // Note: we use per-thread arenas, so Pool must outlive the last use of 
this
+    // memory in the generators.
     llvm::DefaultThreadPool Pool(
+        // ExecutorConcurrency is a flag exposed by AllTUsExecution.h
         llvm::hardware_concurrency(ExecutorConcurrency));
     {
       llvm::TimeTraceScope TS("Reduce");
diff --git a/clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp 
b/clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp
index 3688c1e04b1e5..fa46f79081e80 100644
--- a/clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp
+++ b/clang-tools-extra/unittests/clang-doc/BitcodeTest.cpp
@@ -265,8 +265,15 @@ TEST_F(BitcodeTest, emitInfoWithCommentBitcode) {
   HTML->Children.emplace_back(allocatePtr<CommentInfo>());
   HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
   HTML->Children.back()->Name = "ul";
-  HTML->Children.back()->AttrKeys.emplace_back("class");
-  HTML->Children.back()->AttrValues.emplace_back("test");
+  {
+    llvm::SmallVector<StringRef, 1> Keys = {"class"};
+    HTML->Children.back()->AttrKeys =
+        allocateArray<StringRef>(Keys, TransientArena);
+
+    llvm::SmallVector<StringRef, 1> Values = {"test"};
+    HTML->Children.back()->AttrValues =
+        allocateArray<StringRef>(Values, TransientArena);
+  }
   HTML->Children.emplace_back(allocatePtr<CommentInfo>());
   HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
   HTML->Children.back()->Name = "li";
diff --git a/clang-tools-extra/unittests/clang-doc/MDGeneratorTest.cpp 
b/clang-tools-extra/unittests/clang-doc/MDGeneratorTest.cpp
index aec8a1bc288e4..0f6c0eaa00c21 100644
--- a/clang-tools-extra/unittests/clang-doc/MDGeneratorTest.cpp
+++ b/clang-tools-extra/unittests/clang-doc/MDGeneratorTest.cpp
@@ -247,8 +247,15 @@ TEST_F(MDGeneratorTest, emitCommentMD) {
   HTML->Children.emplace_back(allocatePtr<CommentInfo>());
   HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
   HTML->Children.back()->Name = "ul";
-  HTML->Children.back()->AttrKeys.emplace_back("class");
-  HTML->Children.back()->AttrValues.emplace_back("test");
+  {
+    llvm::SmallVector<StringRef, 1> Keys = {"class"};
+    HTML->Children.back()->AttrKeys =
+        allocateArray<StringRef>(Keys, TransientArena);
+
+    llvm::SmallVector<StringRef, 1> Values = {"test"};
+    HTML->Children.back()->AttrValues =
+        allocateArray<StringRef>(Values, TransientArena);
+  }
   HTML->Children.emplace_back(allocatePtr<CommentInfo>());
   HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
   HTML->Children.back()->Name = "li";
diff --git a/clang-tools-extra/unittests/clang-doc/YAMLGeneratorTest.cpp 
b/clang-tools-extra/unittests/clang-doc/YAMLGeneratorTest.cpp
index 554fe56b41cac..ecc2c970f18bc 100644
--- a/clang-tools-extra/unittests/clang-doc/YAMLGeneratorTest.cpp
+++ b/clang-tools-extra/unittests/clang-doc/YAMLGeneratorTest.cpp
@@ -411,8 +411,15 @@ TEST_F(YAMLGeneratorTest, emitCommentYAML) {
   HTML->Children.emplace_back(allocatePtr<CommentInfo>());
   HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
   HTML->Children.back()->Name = "ul";
-  HTML->Children.back()->AttrKeys.emplace_back("class");
-  HTML->Children.back()->AttrValues.emplace_back("test");
+  {
+    llvm::SmallVector<StringRef, 1> Keys = {"class"};
+    HTML->Children.back()->AttrKeys =
+        allocateArray<StringRef>(Keys, TransientArena);
+
+    llvm::SmallVector<StringRef, 1> Values = {"test"};
+    HTML->Children.back()->AttrValues =
+        allocateArray<StringRef>(Values, TransientArena);
+  }
   HTML->Children.emplace_back(allocatePtr<CommentInfo>());
   HTML->Children.back()->Kind = CommentKind::CK_HTMLStartTagComment;
   HTML->Children.back()->Name = "li";

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to