https://github.com/Neil-N4 created 
https://github.com/llvm/llvm-project/pull/201746

Integrates the Markdown parsing library (added in #200302) into  MDGenerator's 
writeDescription function. When a documentation comment paragraph contains only 
text children, the text is concatenated and passed to parseMarkdown(). If 
fenced code blocks are found, they are emitted as proper Markdown code fences. 
All other paragraphs fall back to the original plain-text path unchanged. This 
is the first vertical slice of Markdown integration. The parser branch 
continues separately. Assisted-by: Claude

Stacked on: #200302

cc @ilovepi  @petrhosek  @evelez7

>From 87ed388807b9239da05c1433ae253456f44fcf1f Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Thu, 28 May 2026 19:23:48 -0400
Subject: [PATCH 1/8] [clang-doc] Add standalone Markdown parsing library

---
 clang-tools-extra/clang-doc/CMakeLists.txt |   1 +
 clang-tools-extra/clang-doc/Markdown.cpp   | 133 +++++++++++++++++++++
 clang-tools-extra/clang-doc/Markdown.h     |  59 +++++++++
 3 files changed, 193 insertions(+)
 create mode 100644 clang-tools-extra/clang-doc/Markdown.cpp
 create mode 100644 clang-tools-extra/clang-doc/Markdown.h

diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt 
b/clang-tools-extra/clang-doc/CMakeLists.txt
index 22e2c8159e9f6..4f69385bdccc3 100644
--- a/clang-tools-extra/clang-doc/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/CMakeLists.txt
@@ -12,6 +12,7 @@ add_clang_library(clangDoc STATIC
   Generators.cpp
   HTMLGenerator.cpp
   Mapper.cpp
+  Markdown.cpp
   MDGenerator.cpp
   Representation.cpp
   Serialize.cpp
diff --git a/clang-tools-extra/clang-doc/Markdown.cpp 
b/clang-tools-extra/clang-doc/Markdown.cpp
new file mode 100644
index 0000000000000..87053c94b0566
--- /dev/null
+++ b/clang-tools-extra/clang-doc/Markdown.cpp
@@ -0,0 +1,133 @@
+//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Markdown.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+static MDNode makeText(llvm::StringRef S) {
+  return {NodeKind::Text, S, {}};
+}
+
+// A line is a table separator if it only contains |, -, :, and spaces,
+// and has at least one -.
+static bool isSepRow(llvm::StringRef Line) {
+  return llvm::all_of(Line, [](char C) {
+    return C == '|' || C == '-' || C == ':' || C == ' ';
+  }) && Line.contains('-');
+}
+
+static llvm::ArrayRef<MDNode>
+allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes,
+              llvm::BumpPtrAllocator &Arena) {
+  if (Nodes.empty())
+    return {};
+  MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size());
+  std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated);
+  return llvm::ArrayRef<MDNode>(Allocated, Nodes.size());
+}
+
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+                                     llvm::BumpPtrAllocator &Arena) {
+  if (ParagraphText.trim().empty())
+    return {};
+
+  llvm::SmallVector<llvm::StringRef, 16> Lines;
+  ParagraphText.split(Lines, '\n');
+
+  llvm::SmallVector<MDNode, 8> Nodes;
+  unsigned I = 0;
+
+  while (I < Lines.size()) {
+    llvm::StringRef Line = Lines[I].trim();
+
+    if (Line.empty()) {
+      ++I;
+      continue;
+    }
+
+    // Fenced code block: ``` or ~~~
+    if (Line.starts_with("```") || Line.starts_with("~~~")) {
+      char Fence = Line[0];
+      llvm::StringRef Lang = Line.drop_front(3).trim();
+      llvm::SmallVector<MDNode, 4> CodeLines;
+      ++I;
+      while (I < Lines.size()) {
+        llvm::StringRef CodeLine = Lines[I].trim();
+        if (CodeLine.size() >= 3 &&
+            llvm::all_of(CodeLine.take_front(3),
+                         [Fence](char C) { return C == Fence; }))
+          break;
+        CodeLines.push_back(makeText(Lines[I]));
+        ++I;
+      }
+      ++I; // skip closing fence
+      MDNode Code;
+      Code.Kind = NodeKind::FencedCode;
+      Code.Content = Lang;
+      Code.Children = allocateNodes(CodeLines, Arena);
+      Nodes.push_back(Code);
+      continue;
+    }
+
+    // Pipe table: current line has | and next line is a separator row
+    if (Line.contains('|') && I + 1 < Lines.size() &&
+        isSepRow(Lines[I + 1].trim())) {
+      llvm::SmallVector<MDNode, 4> Rows;
+      while (I < Lines.size() && Lines[I].trim().contains('|')) {
+        Rows.push_back(makeText(Lines[I].trim()));
+        ++I;
+      }
+      MDNode Table;
+      Table.Kind = NodeKind::Table;
+      Table.Content = {};
+      Table.Children = allocateNodes(Rows, Arena);
+      Nodes.push_back(Table);
+      continue;
+    }
+
+    // Unordered list item
+    if (Line.starts_with("- ") || Line.starts_with("* ") ||
+        Line.starts_with("+ ")) {
+      llvm::SmallVector<MDNode, 4> Items;
+      while (I < Lines.size()) {
+        llvm::StringRef L = Lines[I].trim();
+        if (!L.starts_with("- ") && !L.starts_with("* ") &&
+            !L.starts_with("+ "))
+          break;
+        MDNode Item;
+        Item.Kind = NodeKind::ListItem;
+        Item.Content = L.drop_front(2).trim();
+        Item.Children = {};
+        Items.push_back(Item);
+        ++I;
+      }
+      MDNode List;
+      List.Kind = NodeKind::UnorderedList;
+      List.Content = {};
+      List.Children = allocateNodes(Items, Arena);
+      Nodes.push_back(List);
+      continue;
+    }
+
+    // Plain text fallback
+    Nodes.push_back(makeText(Line));
+    ++I;
+  }
+
+  return allocateNodes(Nodes, Arena);
+}
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/Markdown.h 
b/clang-tools-extra/clang-doc/Markdown.h
new file mode 100644
index 0000000000000..c3374f06e2278
--- /dev/null
+++ b/clang-tools-extra/clang-doc/Markdown.h
@@ -0,0 +1,59 @@
+//===-- Markdown.h - Markdown Parser ----------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a standalone Markdown parsing library for the LLVM
+// ecosystem. The parser takes plain text and returns a tree of typed nodes
+// with no knowledge of comments, Doxygen, or Clang-Doc internals.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+enum class NodeKind : uint8_t {
+  // Block nodes
+  Paragraph,
+  FencedCode,
+  Table,
+  UnorderedList,
+  OrderedList,
+  ListItem,
+  ThematicBreak,
+  // Inline nodes
+  Text,
+  InlineCode,
+  Emphasis,
+  Strong,
+  SoftBreak,
+};
+
+struct MDNode {
+  NodeKind Kind;
+  llvm::StringRef Content;       // lang tag for FencedCode, leaf text for Text
+  llvm::ArrayRef<MDNode> Children; // arena allocated
+};
+
+// Parses Markdown from a single comment paragraph's text.
+// Returns an empty ArrayRef if no Markdown constructs are found,
+// so generators can fall back to plain-text rendering at zero cost.
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+                                     llvm::BumpPtrAllocator &Arena);
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
\ No newline at end of file

>From d350bf1e3fb4090bc65bcd7e5666e87c7b319b18 Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Thu, 28 May 2026 19:34:38 -0400
Subject: [PATCH 2/8] [clang-doc] Fix formatting

---
 clang-tools-extra/clang-doc/Markdown.cpp | 8 +++++---
 clang-tools-extra/clang-doc/Markdown.h   | 2 +-
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clang-doc/Markdown.cpp 
b/clang-tools-extra/clang-doc/Markdown.cpp
index 87053c94b0566..904d8e92dff17 100644
--- a/clang-tools-extra/clang-doc/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/Markdown.cpp
@@ -22,9 +22,11 @@ static MDNode makeText(llvm::StringRef S) {
 // A line is a table separator if it only contains |, -, :, and spaces,
 // and has at least one -.
 static bool isSepRow(llvm::StringRef Line) {
-  return llvm::all_of(Line, [](char C) {
-    return C == '|' || C == '-' || C == ':' || C == ' ';
-  }) && Line.contains('-');
+  return llvm::all_of(Line,
+                      [](char C) {
+                        return C == '|' || C == '-' || C == ':' || C == ' ';
+                      }) &&
+         Line.contains('-');
 }
 
 static llvm::ArrayRef<MDNode>
diff --git a/clang-tools-extra/clang-doc/Markdown.h 
b/clang-tools-extra/clang-doc/Markdown.h
index c3374f06e2278..bf4815e068b53 100644
--- a/clang-tools-extra/clang-doc/Markdown.h
+++ b/clang-tools-extra/clang-doc/Markdown.h
@@ -42,7 +42,7 @@ enum class NodeKind : uint8_t {
 
 struct MDNode {
   NodeKind Kind;
-  llvm::StringRef Content;       // lang tag for FencedCode, leaf text for Text
+  llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
   llvm::ArrayRef<MDNode> Children; // arena allocated
 };
 

>From 73a9197525f835ccd3ebee0fc89dad83d19bb0ad Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Thu, 28 May 2026 19:42:16 -0400
Subject: [PATCH 3/8] [clang-doc] Fix formatting

---
 clang-tools-extra/clang-doc/Markdown.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/clang-tools-extra/clang-doc/Markdown.cpp 
b/clang-tools-extra/clang-doc/Markdown.cpp
index 904d8e92dff17..17ee61369fb6b 100644
--- a/clang-tools-extra/clang-doc/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/Markdown.cpp
@@ -15,9 +15,7 @@ namespace clang {
 namespace doc {
 namespace markdown {
 
-static MDNode makeText(llvm::StringRef S) {
-  return {NodeKind::Text, S, {}};
-}
+static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; }
 
 // A line is a table separator if it only contains |, -, :, and spaces,
 // and has at least one -.

>From 1a899ed0122a7a15787ebe1760225b46289d50ba Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Fri, 29 May 2026 14:21:18 -0400
Subject: [PATCH 4/8] [clang-doc] Move Markdown library to support folder, fix
 headers and enum prefixes

---
 clang-tools-extra/clang-doc/CMakeLists.txt    |   1 -
 .../clang-doc/support/CMakeLists.txt          |   3 +-
 .../clang-doc/support/Markdown.cpp            | 133 ++++++++++++++++++
 .../clang-doc/support/Markdown.h              |  59 ++++++++
 4 files changed, 194 insertions(+), 2 deletions(-)
 create mode 100644 clang-tools-extra/clang-doc/support/Markdown.cpp
 create mode 100644 clang-tools-extra/clang-doc/support/Markdown.h

diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt 
b/clang-tools-extra/clang-doc/CMakeLists.txt
index 4f69385bdccc3..22e2c8159e9f6 100644
--- a/clang-tools-extra/clang-doc/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/CMakeLists.txt
@@ -12,7 +12,6 @@ add_clang_library(clangDoc STATIC
   Generators.cpp
   HTMLGenerator.cpp
   Mapper.cpp
-  Markdown.cpp
   MDGenerator.cpp
   Representation.cpp
   Serialize.cpp
diff --git a/clang-tools-extra/clang-doc/support/CMakeLists.txt 
b/clang-tools-extra/clang-doc/support/CMakeLists.txt
index 8ac913ffbe998..acff865190ff9 100644
--- a/clang-tools-extra/clang-doc/support/CMakeLists.txt
+++ b/clang-tools-extra/clang-doc/support/CMakeLists.txt
@@ -6,5 +6,6 @@ set(LLVM_LINK_COMPONENTS
 
 add_clang_library(clangDocSupport STATIC
   File.cpp
+  Markdown.cpp
   Utils.cpp
-  )
+  )
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp 
b/clang-tools-extra/clang-doc/support/Markdown.cpp
new file mode 100644
index 0000000000000..17ee61369fb6b
--- /dev/null
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -0,0 +1,133 @@
+//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "Markdown.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; }
+
+// A line is a table separator if it only contains |, -, :, and spaces,
+// and has at least one -.
+static bool isSepRow(llvm::StringRef Line) {
+  return llvm::all_of(Line,
+                      [](char C) {
+                        return C == '|' || C == '-' || C == ':' || C == ' ';
+                      }) &&
+         Line.contains('-');
+}
+
+static llvm::ArrayRef<MDNode>
+allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes,
+              llvm::BumpPtrAllocator &Arena) {
+  if (Nodes.empty())
+    return {};
+  MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size());
+  std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated);
+  return llvm::ArrayRef<MDNode>(Allocated, Nodes.size());
+}
+
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+                                     llvm::BumpPtrAllocator &Arena) {
+  if (ParagraphText.trim().empty())
+    return {};
+
+  llvm::SmallVector<llvm::StringRef, 16> Lines;
+  ParagraphText.split(Lines, '\n');
+
+  llvm::SmallVector<MDNode, 8> Nodes;
+  unsigned I = 0;
+
+  while (I < Lines.size()) {
+    llvm::StringRef Line = Lines[I].trim();
+
+    if (Line.empty()) {
+      ++I;
+      continue;
+    }
+
+    // Fenced code block: ``` or ~~~
+    if (Line.starts_with("```") || Line.starts_with("~~~")) {
+      char Fence = Line[0];
+      llvm::StringRef Lang = Line.drop_front(3).trim();
+      llvm::SmallVector<MDNode, 4> CodeLines;
+      ++I;
+      while (I < Lines.size()) {
+        llvm::StringRef CodeLine = Lines[I].trim();
+        if (CodeLine.size() >= 3 &&
+            llvm::all_of(CodeLine.take_front(3),
+                         [Fence](char C) { return C == Fence; }))
+          break;
+        CodeLines.push_back(makeText(Lines[I]));
+        ++I;
+      }
+      ++I; // skip closing fence
+      MDNode Code;
+      Code.Kind = NodeKind::FencedCode;
+      Code.Content = Lang;
+      Code.Children = allocateNodes(CodeLines, Arena);
+      Nodes.push_back(Code);
+      continue;
+    }
+
+    // Pipe table: current line has | and next line is a separator row
+    if (Line.contains('|') && I + 1 < Lines.size() &&
+        isSepRow(Lines[I + 1].trim())) {
+      llvm::SmallVector<MDNode, 4> Rows;
+      while (I < Lines.size() && Lines[I].trim().contains('|')) {
+        Rows.push_back(makeText(Lines[I].trim()));
+        ++I;
+      }
+      MDNode Table;
+      Table.Kind = NodeKind::Table;
+      Table.Content = {};
+      Table.Children = allocateNodes(Rows, Arena);
+      Nodes.push_back(Table);
+      continue;
+    }
+
+    // Unordered list item
+    if (Line.starts_with("- ") || Line.starts_with("* ") ||
+        Line.starts_with("+ ")) {
+      llvm::SmallVector<MDNode, 4> Items;
+      while (I < Lines.size()) {
+        llvm::StringRef L = Lines[I].trim();
+        if (!L.starts_with("- ") && !L.starts_with("* ") &&
+            !L.starts_with("+ "))
+          break;
+        MDNode Item;
+        Item.Kind = NodeKind::ListItem;
+        Item.Content = L.drop_front(2).trim();
+        Item.Children = {};
+        Items.push_back(Item);
+        ++I;
+      }
+      MDNode List;
+      List.Kind = NodeKind::UnorderedList;
+      List.Content = {};
+      List.Children = allocateNodes(Items, Arena);
+      Nodes.push_back(List);
+      continue;
+    }
+
+    // Plain text fallback
+    Nodes.push_back(makeText(Line));
+    ++I;
+  }
+
+  return allocateNodes(Nodes, Arena);
+}
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
\ No newline at end of file
diff --git a/clang-tools-extra/clang-doc/support/Markdown.h 
b/clang-tools-extra/clang-doc/support/Markdown.h
new file mode 100644
index 0000000000000..bf4815e068b53
--- /dev/null
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -0,0 +1,59 @@
+//===-- Markdown.h - Markdown Parser ----------------------------*- C++ 
-*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a standalone Markdown parsing library for the LLVM
+// ecosystem. The parser takes plain text and returns a tree of typed nodes
+// with no knowledge of comments, Doxygen, or Clang-Doc internals.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+
+namespace clang {
+namespace doc {
+namespace markdown {
+
+enum class NodeKind : uint8_t {
+  // Block nodes
+  Paragraph,
+  FencedCode,
+  Table,
+  UnorderedList,
+  OrderedList,
+  ListItem,
+  ThematicBreak,
+  // Inline nodes
+  Text,
+  InlineCode,
+  Emphasis,
+  Strong,
+  SoftBreak,
+};
+
+struct MDNode {
+  NodeKind Kind;
+  llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text
+  llvm::ArrayRef<MDNode> Children; // arena allocated
+};
+
+// Parses Markdown from a single comment paragraph's text.
+// Returns an empty ArrayRef if no Markdown constructs are found,
+// so generators can fall back to plain-text rendering at zero cost.
+llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
+                                     llvm::BumpPtrAllocator &Arena);
+
+} // namespace markdown
+} // namespace doc
+} // namespace clang
+
+#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
\ No newline at end of file

>From 750b43aacf1705b707ae736e58deb1e55e5d169a Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Fri, 29 May 2026 14:27:09 -0400
Subject: [PATCH 5/8] [clang-doc] Fix enum prefixes and file headers

---
 .../clang-doc/support/Markdown.cpp            | 21 +++++----
 .../clang-doc/support/Markdown.h              | 45 ++++++++++---------
 2 files changed, 33 insertions(+), 33 deletions(-)

diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp 
b/clang-tools-extra/clang-doc/support/Markdown.cpp
index 17ee61369fb6b..bbce53fa17156 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.cpp
+++ b/clang-tools-extra/clang-doc/support/Markdown.cpp
@@ -1,4 +1,4 @@
-//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ 
-*-===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -15,16 +15,15 @@ namespace clang {
 namespace doc {
 namespace markdown {
 
-static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; }
+static MDNode makeText(llvm::StringRef S) {
+  return {NodeKind::NK_Text, S, {}};
+}
 
 // A line is a table separator if it only contains |, -, :, and spaces,
 // and has at least one -.
 static bool isSepRow(llvm::StringRef Line) {
-  return llvm::all_of(Line,
-                      [](char C) {
-                        return C == '|' || C == '-' || C == ':' || C == ' ';
-                      }) &&
-         Line.contains('-');
+  return Line.contains('-') &&
+         Line.find_first_not_of("|-: ") == llvm::StringRef::npos;
 }
 
 static llvm::ArrayRef<MDNode>
@@ -73,7 +72,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef 
ParagraphText,
       }
       ++I; // skip closing fence
       MDNode Code;
-      Code.Kind = NodeKind::FencedCode;
+      Code.Kind = NodeKind::NK_FencedCode;
       Code.Content = Lang;
       Code.Children = allocateNodes(CodeLines, Arena);
       Nodes.push_back(Code);
@@ -89,7 +88,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef 
ParagraphText,
         ++I;
       }
       MDNode Table;
-      Table.Kind = NodeKind::Table;
+      Table.Kind = NodeKind::NK_Table;
       Table.Content = {};
       Table.Children = allocateNodes(Rows, Arena);
       Nodes.push_back(Table);
@@ -106,14 +105,14 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef 
ParagraphText,
             !L.starts_with("+ "))
           break;
         MDNode Item;
-        Item.Kind = NodeKind::ListItem;
+        Item.Kind = NodeKind::NK_ListItem;
         Item.Content = L.drop_front(2).trim();
         Item.Children = {};
         Items.push_back(Item);
         ++I;
       }
       MDNode List;
-      List.Kind = NodeKind::UnorderedList;
+      List.Kind = NodeKind::NK_UnorderedList;
       List.Content = {};
       List.Children = allocateNodes(Items, Arena);
       Nodes.push_back(List);
diff --git a/clang-tools-extra/clang-doc/support/Markdown.h 
b/clang-tools-extra/clang-doc/support/Markdown.h
index bf4815e068b53..e665170473601 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.h
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -1,15 +1,16 @@
-//===-- Markdown.h - Markdown Parser ----------------------------*- C++ 
-*-===//
+//===----------------------------------------------------------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 
//===----------------------------------------------------------------------===//
-//
-// This file defines a standalone Markdown parsing library for the LLVM
-// ecosystem. The parser takes plain text and returns a tree of typed nodes
-// with no knowledge of comments, Doxygen, or Clang-Doc internals.
-//
+///
+/// \file
+/// This file defines a standalone Markdown parsing library for the LLVM
+/// ecosystem. The parser takes plain text and returns a tree of typed nodes
+/// with no knowledge of comments, Doxygen, or Clang-Doc internals.
+///
 
//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H
@@ -23,21 +24,21 @@ namespace clang {
 namespace doc {
 namespace markdown {
 
-enum class NodeKind : uint8_t {
+enum class NodeKind {
   // Block nodes
-  Paragraph,
-  FencedCode,
-  Table,
-  UnorderedList,
-  OrderedList,
-  ListItem,
-  ThematicBreak,
+  NK_Paragraph,
+  NK_FencedCode,
+  NK_Table,
+  NK_UnorderedList,
+  NK_OrderedList,
+  NK_ListItem,
+  NK_ThematicBreak,
   // Inline nodes
-  Text,
-  InlineCode,
-  Emphasis,
-  Strong,
-  SoftBreak,
+  NK_Text,
+  NK_InlineCode,
+  NK_Emphasis,
+  NK_Strong,
+  NK_SoftBreak,
 };
 
 struct MDNode {
@@ -46,9 +47,9 @@ struct MDNode {
   llvm::ArrayRef<MDNode> Children; // arena allocated
 };
 
-// Parses Markdown from a single comment paragraph's text.
-// Returns an empty ArrayRef if no Markdown constructs are found,
-// so generators can fall back to plain-text rendering at zero cost.
+/// Parses Markdown from a single comment paragraph's text.
+/// Returns an empty ArrayRef if no Markdown constructs are found,
+/// so generators can fall back to plain-text rendering at zero cost.
 llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText,
                                      llvm::BumpPtrAllocator &Arena);
 

>From 434e6328c40d4f10ab70b5a4e28ca70bc1e7edd5 Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Fri, 29 May 2026 16:17:59 -0400
Subject: [PATCH 6/8] [clang-doc] Add unit tests for Markdown parser

---
 .../unittests/clang-doc/CMakeLists.txt        |  4 +-
 .../clang-doc/MarkdownParserTest.cpp          | 94 +++++++++++++++++++
 2 files changed, 97 insertions(+), 1 deletion(-)
 create mode 100644 clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp

diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt 
b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
index 01b34ec9a791e..b74207ac88fa7 100644
--- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
+++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt
@@ -26,6 +26,7 @@ add_extra_unittest(ClangDocTests
   ClangDocTest.cpp
   GeneratorTest.cpp
   HTMLGeneratorTest.cpp
+  MarkdownParserTest.cpp
   MDGeneratorTest.cpp
   MergeTest.cpp
   SerializeTest.cpp
@@ -49,5 +50,6 @@ clang_target_link_libraries(ClangDocTests
 target_link_libraries(ClangDocTests
   PRIVATE
   clangDoc
+  clangDocSupport
   LLVMTestingSupport
-  )
+  )
\ No newline at end of file
diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp 
b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
new file mode 100644
index 0000000000000..8df5efc7f1d5f
--- /dev/null
+++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp
@@ -0,0 +1,94 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM 
Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "support/Markdown.h"
+#include "llvm/Support/Allocator.h"
+#include "gtest/gtest.h"
+
+using namespace clang::doc::markdown;
+
+namespace {
+
+TEST(MarkdownParserTest, EmptyInput) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("", Arena);
+  EXPECT_TRUE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, WhitespaceOnlyInput) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("   \n  \n", Arena);
+  EXPECT_TRUE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, PlainText) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("hello world", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Text);
+  EXPECT_EQ(Nodes[0].Content, "hello world");
+}
+
+TEST(MarkdownParserTest, FencedCodeBlock) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("```cpp\nint x = 0;\n```", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
+  EXPECT_EQ(Nodes[0].Content, "cpp");
+  ASSERT_EQ(Nodes[0].Children.size(), 1u);
+}
+
+TEST(MarkdownParserTest, FencedCodeBlockNoLang) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("```\nsome code\n```", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode);
+  EXPECT_TRUE(Nodes[0].Content.empty());
+}
+
+TEST(MarkdownParserTest, UnterminatedFenceReturnsEmpty) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("```cpp\nint x = 0;", Arena);
+  // Unterminated fence should not crash and should produce a code node
+  // with whatever lines were found.
+  EXPECT_FALSE(Nodes.empty());
+}
+
+TEST(MarkdownParserTest, PipeTable) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("| A | B |\n|---|---|\n| 1 | 2 |", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Table);
+}
+
+TEST(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("a | b\nc | d", Arena);
+  // No separator row so should not be parsed as a table
+  for (const auto &Node : Nodes)
+    EXPECT_NE(Node.Kind, NodeKind::NK_Table);
+}
+
+TEST(MarkdownParserTest, UnorderedList) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("- foo\n- bar\n- baz", Arena);
+  ASSERT_EQ(Nodes.size(), 1u);
+  EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_UnorderedList);
+  ASSERT_EQ(Nodes[0].Children.size(), 3u);
+  EXPECT_EQ(Nodes[0].Children[0].Content, "foo");
+  EXPECT_EQ(Nodes[0].Children[1].Content, "bar");
+  EXPECT_EQ(Nodes[0].Children[2].Content, "baz");
+}
+
+TEST(MarkdownParserTest, MixedContent) {
+  llvm::BumpPtrAllocator Arena;
+  auto Nodes = parseMarkdown("some text\n```\ncode\n```\n- item", Arena);
+  EXPECT_EQ(Nodes.size(), 3u);
+}
+
+} // namespace
\ No newline at end of file

>From fb907ac64844b5aa7c0679e32884438dc454949a Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Fri, 29 May 2026 16:21:43 -0400
Subject: [PATCH 7/8] [clang-doc] Add design documentation to Markdown.h

---
 clang-tools-extra/clang-doc/support/Markdown.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/clang-tools-extra/clang-doc/support/Markdown.h 
b/clang-tools-extra/clang-doc/support/Markdown.h
index e665170473601..0ae33e33e7eba 100644
--- a/clang-tools-extra/clang-doc/support/Markdown.h
+++ b/clang-tools-extra/clang-doc/support/Markdown.h
@@ -11,6 +11,22 @@
 /// ecosystem. The parser takes plain text and returns a tree of typed nodes
 /// with no knowledge of comments, Doxygen, or Clang-Doc internals.
 ///
+/// This is a simple Markdown parser for use inside Clang-Doc's comment
+/// pipeline. You give it a paragraph of text and an arena allocator, and it
+/// gives back a list of typed nodes describing the Markdown structure it 
found.
+///
+/// The main entry point is parseMarkdown(). If the text has no Markdown in it,
+/// you get back an empty list and can fall back to plain-text output. If it
+/// does, you get a tree of MDNode structs where each node has a kind, optional
+/// content (like the language tag on a code fence), and optional children.
+///
+/// All nodes are allocated in the arena you pass in. You own the arena and are
+/// responsible for keeping it alive as long as you use the nodes.
+///
+/// The parser handles fenced code blocks, pipe tables, and unordered lists.
+/// Anything it does not recognize comes back as a plain text node. It will
+/// never crash on bad input.
+///
 
//===----------------------------------------------------------------------===//
 
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H

>From 5ea6a4ee7b6f41862fbc78b4a348baae32b597d9 Mon Sep 17 00:00:00 2001
From: Neil-N4 <[email protected]>
Date: Fri, 5 Jun 2026 00:50:59 -0400
Subject: [PATCH 8/8] [clang-doc] Integrate Markdown parser into MDGenerator
 for fenced code blocks

---
 clang-tools-extra/clang-doc/MDGenerator.cpp | 87 ++++++++++++++++++++-
 1 file changed, 83 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp 
b/clang-tools-extra/clang-doc/MDGenerator.cpp
index df1ca6b868d43..0a01ec5aeba2d 100644
--- a/clang-tools-extra/clang-doc/MDGenerator.cpp
+++ b/clang-tools-extra/clang-doc/MDGenerator.cpp
@@ -8,14 +8,19 @@
 
 #include "Generators.h"
 #include "Representation.h"
+#include "support/Markdown.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
 #include <string>
 
+#define DEBUG_TYPE "clang-doc-md-generator"
+
 using namespace llvm;
 
 namespace clang {
@@ -54,12 +59,10 @@ static void writeHeader(const Twine &Text, unsigned int 
Num, raw_ostream &OS) {
 
 static void writeSourceFileRef(const ClangDocContext &CDCtx, const Location &L,
                                raw_ostream &OS) {
-
   if (!CDCtx.RepositoryUrl) {
     OS << "*Defined at " << L.Filename << "#"
        << std::to_string(L.StartLineNumber) << "*";
   } else {
-
     OS << formatv("*Defined at [#{0}{1}{2}](#{0}{1}{3})*",
                   CDCtx.RepositoryLinePrefix.value_or(""), L.StartLineNumber,
                   L.Filename, *CDCtx.RepositoryUrl);
@@ -148,6 +151,40 @@ static void maybeWriteSourceFileRef(llvm::raw_ostream &OS,
     writeSourceFileRef(CDCtx, *DefLoc, OS);
 }
 
+/// Writes a single parsed Markdown node to the output stream.
+///
+/// Currently handles fenced code blocks and plain text. All other node
+/// kinds fall back to emitting their content as plain text so that
+/// unrecognised constructs never produce empty output.
+static void writeMDNode(const markdown::MDNode &Node, raw_ostream &OS) {
+  switch (Node.Kind) {
+  case markdown::NodeKind::NK_FencedCode:
+    // Emit a Markdown fenced code block, preserving the language tag if one
+    // was present on the opening fence line.
+    OS << "\n```";
+    if (!Node.Content.empty())
+      OS << Node.Content;
+    OS << "\n";
+    for (const auto &Line : Node.Children)
+      OS << Line.Content << "\n";
+    OS << "```\n\n";
+    break;
+
+  case markdown::NodeKind::NK_Text:
+    OS << Node.Content;
+    break;
+
+  default:
+    // Unhandled node kinds: emit content and recurse into children so
+    // nothing is silently dropped.
+    if (!Node.Content.empty())
+      OS << Node.Content;
+    for (const auto &Child : Node.Children)
+      writeMDNode(Child, OS);
+    break;
+  }
+}
+
 static void writeDescription(const CommentInfo &I, raw_ostream &OS) {
   switch (I.Kind) {
   case CommentKind::CK_FullComment:
@@ -155,11 +192,53 @@ static void writeDescription(const CommentInfo &I, 
raw_ostream &OS) {
       writeDescription(Child, OS);
     break;
 
-  case CommentKind::CK_ParagraphComment:
+  case CommentKind::CK_ParagraphComment: {
+    // Clang's comment parser represents each line of a documentation
+    // paragraph as a separate CK_TextComment child. To parse Markdown
+    // constructs that span multiple lines (e.g. fenced code blocks),
+    // we concatenate all text-only children and attempt to parse the
+    // result as Markdown before falling back to plain-text emission.
+    //
+    // If the paragraph contains non-text children (inline commands,
+    // HTML tags, etc.) we skip Markdown parsing entirely and fall back
+    // to the original recursive approach so existing behaviour is
+    // preserved.
+    bool AllTextChildren = true;
+    for (const auto &Child : I.Children)
+      if (Child.Kind != CommentKind::CK_TextComment) {
+        AllTextChildren = false;
+        break;
+      }
+
+    if (AllTextChildren && !I.Children.empty()) {
+      std::string ParagraphText;
+      llvm::raw_string_ostream TextOS(ParagraphText);
+      for (const auto &Child : I.Children)
+        if (!Child.Text.empty())
+          TextOS << Child.Text << "\n";
+
+      // The allocator is scoped to this paragraph; nodes must not outlive it.
+      llvm::BumpPtrAllocator Arena;
+      auto Nodes = markdown::parseMarkdown(ParagraphText, Arena);
+
+      LLVM_DEBUG(llvm::dbgs()
+                 << "[clang-doc] paragraph -> " << Nodes.size()
+                 << " Markdown node(s)\n");
+
+      if (!Nodes.empty()) {
+        for (const auto &Node : Nodes)
+          writeMDNode(Node, OS);
+        writeNewLine(OS);
+        break;
+      }
+    }
+
+    // Fall back: emit children recursively (original behaviour).
     for (const auto &Child : I.Children)
       writeDescription(Child, OS);
     writeNewLine(OS);
     break;
+  }
 
   case CommentKind::CK_BlockCommandComment:
     OS << genEmphasis(I.Name) << " ";
@@ -608,4 +687,4 @@ static GeneratorRegistry::Add<MDGenerator> 
MD(MDGenerator::Format,
 volatile int MDGeneratorAnchorSource = 0;
 
 } // namespace doc
-} // namespace clang
+} // namespace clang
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to