https://github.com/Neil-N4 updated https://github.com/llvm/llvm-project/pull/201746
>From 87ed388807b9239da05c1433ae253456f44fcf1f Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Thu, 28 May 2026 19:23:48 -0400 Subject: [PATCH 01/12] [clang-doc] Add standalone Markdown parsing library --- clang-tools-extra/clang-doc/CMakeLists.txt | 1 + clang-tools-extra/clang-doc/Markdown.cpp | 133 +++++++++++++++++++++ clang-tools-extra/clang-doc/Markdown.h | 59 +++++++++ 3 files changed, 193 insertions(+) create mode 100644 clang-tools-extra/clang-doc/Markdown.cpp create mode 100644 clang-tools-extra/clang-doc/Markdown.h diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt b/clang-tools-extra/clang-doc/CMakeLists.txt index 22e2c8159e9f6..4f69385bdccc3 100644 --- a/clang-tools-extra/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/CMakeLists.txt @@ -12,6 +12,7 @@ add_clang_library(clangDoc STATIC Generators.cpp HTMLGenerator.cpp Mapper.cpp + Markdown.cpp MDGenerator.cpp Representation.cpp Serialize.cpp diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp new file mode 100644 index 0000000000000..87053c94b0566 --- /dev/null +++ b/clang-tools-extra/clang-doc/Markdown.cpp @@ -0,0 +1,133 @@ +//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace doc { +namespace markdown { + +static MDNode makeText(llvm::StringRef S) { + return {NodeKind::Text, S, {}}; +} + +// A line is a table separator if it only contains |, -, :, and spaces, +// and has at least one -. +static bool isSepRow(llvm::StringRef Line) { + return llvm::all_of(Line, [](char C) { + return C == '|' || C == '-' || C == ':' || C == ' '; + }) && Line.contains('-'); +} + +static llvm::ArrayRef<MDNode> +allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes, + llvm::BumpPtrAllocator &Arena) { + if (Nodes.empty()) + return {}; + MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size()); + std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated); + return llvm::ArrayRef<MDNode>(Allocated, Nodes.size()); +} + +llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, + llvm::BumpPtrAllocator &Arena) { + if (ParagraphText.trim().empty()) + return {}; + + llvm::SmallVector<llvm::StringRef, 16> Lines; + ParagraphText.split(Lines, '\n'); + + llvm::SmallVector<MDNode, 8> Nodes; + unsigned I = 0; + + while (I < Lines.size()) { + llvm::StringRef Line = Lines[I].trim(); + + if (Line.empty()) { + ++I; + continue; + } + + // Fenced code block: ``` or ~~~ + if (Line.starts_with("```") || Line.starts_with("~~~")) { + char Fence = Line[0]; + llvm::StringRef Lang = Line.drop_front(3).trim(); + llvm::SmallVector<MDNode, 4> CodeLines; + ++I; + while (I < Lines.size()) { + llvm::StringRef CodeLine = Lines[I].trim(); + if (CodeLine.size() >= 3 && + llvm::all_of(CodeLine.take_front(3), + [Fence](char C) { return C == Fence; })) + break; + CodeLines.push_back(makeText(Lines[I])); + ++I; + } + ++I; // skip closing fence + MDNode Code; + Code.Kind = NodeKind::FencedCode; + Code.Content = Lang; + Code.Children = allocateNodes(CodeLines, Arena); + Nodes.push_back(Code); + continue; + } + + // Pipe table: current line has | and next line is a separator row + if (Line.contains('|') && I + 1 < Lines.size() && + isSepRow(Lines[I + 1].trim())) { + llvm::SmallVector<MDNode, 4> Rows; + while (I < Lines.size() && Lines[I].trim().contains('|')) { + Rows.push_back(makeText(Lines[I].trim())); + ++I; + } + MDNode Table; + Table.Kind = NodeKind::Table; + Table.Content = {}; + Table.Children = allocateNodes(Rows, Arena); + Nodes.push_back(Table); + continue; + } + + // Unordered list item + if (Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ ")) { + llvm::SmallVector<MDNode, 4> Items; + while (I < Lines.size()) { + llvm::StringRef L = Lines[I].trim(); + if (!L.starts_with("- ") && !L.starts_with("* ") && + !L.starts_with("+ ")) + break; + MDNode Item; + Item.Kind = NodeKind::ListItem; + Item.Content = L.drop_front(2).trim(); + Item.Children = {}; + Items.push_back(Item); + ++I; + } + MDNode List; + List.Kind = NodeKind::UnorderedList; + List.Content = {}; + List.Children = allocateNodes(Items, Arena); + Nodes.push_back(List); + continue; + } + + // Plain text fallback + Nodes.push_back(makeText(Line)); + ++I; + } + + return allocateNodes(Nodes, Arena); +} + +} // namespace markdown +} // namespace doc +} // namespace clang \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/Markdown.h b/clang-tools-extra/clang-doc/Markdown.h new file mode 100644 index 0000000000000..c3374f06e2278 --- /dev/null +++ b/clang-tools-extra/clang-doc/Markdown.h @@ -0,0 +1,59 @@ +//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a standalone Markdown parsing library for the LLVM +// ecosystem. The parser takes plain text and returns a tree of typed nodes +// with no knowledge of comments, Doxygen, or Clang-Doc internals. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace doc { +namespace markdown { + +enum class NodeKind : uint8_t { + // Block nodes + Paragraph, + FencedCode, + Table, + UnorderedList, + OrderedList, + ListItem, + ThematicBreak, + // Inline nodes + Text, + InlineCode, + Emphasis, + Strong, + SoftBreak, +}; + +struct MDNode { + NodeKind Kind; + llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text + llvm::ArrayRef<MDNode> Children; // arena allocated +}; + +// Parses Markdown from a single comment paragraph's text. +// Returns an empty ArrayRef if no Markdown constructs are found, +// so generators can fall back to plain-text rendering at zero cost. +llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, + llvm::BumpPtrAllocator &Arena); + +} // namespace markdown +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H \ No newline at end of file >From d350bf1e3fb4090bc65bcd7e5666e87c7b319b18 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Thu, 28 May 2026 19:34:38 -0400 Subject: [PATCH 02/12] [clang-doc] Fix formatting --- clang-tools-extra/clang-doc/Markdown.cpp | 8 +++++--- clang-tools-extra/clang-doc/Markdown.h | 2 +- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp index 87053c94b0566..904d8e92dff17 100644 --- a/clang-tools-extra/clang-doc/Markdown.cpp +++ b/clang-tools-extra/clang-doc/Markdown.cpp @@ -22,9 +22,11 @@ static MDNode makeText(llvm::StringRef S) { // A line is a table separator if it only contains |, -, :, and spaces, // and has at least one -. static bool isSepRow(llvm::StringRef Line) { - return llvm::all_of(Line, [](char C) { - return C == '|' || C == '-' || C == ':' || C == ' '; - }) && Line.contains('-'); + return llvm::all_of(Line, + [](char C) { + return C == '|' || C == '-' || C == ':' || C == ' '; + }) && + Line.contains('-'); } static llvm::ArrayRef<MDNode> diff --git a/clang-tools-extra/clang-doc/Markdown.h b/clang-tools-extra/clang-doc/Markdown.h index c3374f06e2278..bf4815e068b53 100644 --- a/clang-tools-extra/clang-doc/Markdown.h +++ b/clang-tools-extra/clang-doc/Markdown.h @@ -42,7 +42,7 @@ enum class NodeKind : uint8_t { struct MDNode { NodeKind Kind; - llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text + llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text llvm::ArrayRef<MDNode> Children; // arena allocated }; >From 73a9197525f835ccd3ebee0fc89dad83d19bb0ad Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Thu, 28 May 2026 19:42:16 -0400 Subject: [PATCH 03/12] [clang-doc] Fix formatting --- clang-tools-extra/clang-doc/Markdown.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp index 904d8e92dff17..17ee61369fb6b 100644 --- a/clang-tools-extra/clang-doc/Markdown.cpp +++ b/clang-tools-extra/clang-doc/Markdown.cpp @@ -15,9 +15,7 @@ namespace clang { namespace doc { namespace markdown { -static MDNode makeText(llvm::StringRef S) { - return {NodeKind::Text, S, {}}; -} +static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; } // A line is a table separator if it only contains |, -, :, and spaces, // and has at least one -. >From 1a899ed0122a7a15787ebe1760225b46289d50ba Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 29 May 2026 14:21:18 -0400 Subject: [PATCH 04/12] [clang-doc] Move Markdown library to support folder, fix headers and enum prefixes --- clang-tools-extra/clang-doc/CMakeLists.txt | 1 - .../clang-doc/support/CMakeLists.txt | 3 +- .../clang-doc/support/Markdown.cpp | 133 ++++++++++++++++++ .../clang-doc/support/Markdown.h | 59 ++++++++ 4 files changed, 194 insertions(+), 2 deletions(-) create mode 100644 clang-tools-extra/clang-doc/support/Markdown.cpp create mode 100644 clang-tools-extra/clang-doc/support/Markdown.h diff --git a/clang-tools-extra/clang-doc/CMakeLists.txt b/clang-tools-extra/clang-doc/CMakeLists.txt index 4f69385bdccc3..22e2c8159e9f6 100644 --- a/clang-tools-extra/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/CMakeLists.txt @@ -12,7 +12,6 @@ add_clang_library(clangDoc STATIC Generators.cpp HTMLGenerator.cpp Mapper.cpp - Markdown.cpp MDGenerator.cpp Representation.cpp Serialize.cpp diff --git a/clang-tools-extra/clang-doc/support/CMakeLists.txt b/clang-tools-extra/clang-doc/support/CMakeLists.txt index 8ac913ffbe998..acff865190ff9 100644 --- a/clang-tools-extra/clang-doc/support/CMakeLists.txt +++ b/clang-tools-extra/clang-doc/support/CMakeLists.txt @@ -6,5 +6,6 @@ set(LLVM_LINK_COMPONENTS add_clang_library(clangDocSupport STATIC File.cpp + Markdown.cpp Utils.cpp - ) + ) \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp new file mode 100644 index 0000000000000..17ee61369fb6b --- /dev/null +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -0,0 +1,133 @@ +//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Markdown.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace doc { +namespace markdown { + +static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; } + +// A line is a table separator if it only contains |, -, :, and spaces, +// and has at least one -. +static bool isSepRow(llvm::StringRef Line) { + return llvm::all_of(Line, + [](char C) { + return C == '|' || C == '-' || C == ':' || C == ' '; + }) && + Line.contains('-'); +} + +static llvm::ArrayRef<MDNode> +allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes, + llvm::BumpPtrAllocator &Arena) { + if (Nodes.empty()) + return {}; + MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size()); + std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated); + return llvm::ArrayRef<MDNode>(Allocated, Nodes.size()); +} + +llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, + llvm::BumpPtrAllocator &Arena) { + if (ParagraphText.trim().empty()) + return {}; + + llvm::SmallVector<llvm::StringRef, 16> Lines; + ParagraphText.split(Lines, '\n'); + + llvm::SmallVector<MDNode, 8> Nodes; + unsigned I = 0; + + while (I < Lines.size()) { + llvm::StringRef Line = Lines[I].trim(); + + if (Line.empty()) { + ++I; + continue; + } + + // Fenced code block: ``` or ~~~ + if (Line.starts_with("```") || Line.starts_with("~~~")) { + char Fence = Line[0]; + llvm::StringRef Lang = Line.drop_front(3).trim(); + llvm::SmallVector<MDNode, 4> CodeLines; + ++I; + while (I < Lines.size()) { + llvm::StringRef CodeLine = Lines[I].trim(); + if (CodeLine.size() >= 3 && + llvm::all_of(CodeLine.take_front(3), + [Fence](char C) { return C == Fence; })) + break; + CodeLines.push_back(makeText(Lines[I])); + ++I; + } + ++I; // skip closing fence + MDNode Code; + Code.Kind = NodeKind::FencedCode; + Code.Content = Lang; + Code.Children = allocateNodes(CodeLines, Arena); + Nodes.push_back(Code); + continue; + } + + // Pipe table: current line has | and next line is a separator row + if (Line.contains('|') && I + 1 < Lines.size() && + isSepRow(Lines[I + 1].trim())) { + llvm::SmallVector<MDNode, 4> Rows; + while (I < Lines.size() && Lines[I].trim().contains('|')) { + Rows.push_back(makeText(Lines[I].trim())); + ++I; + } + MDNode Table; + Table.Kind = NodeKind::Table; + Table.Content = {}; + Table.Children = allocateNodes(Rows, Arena); + Nodes.push_back(Table); + continue; + } + + // Unordered list item + if (Line.starts_with("- ") || Line.starts_with("* ") || + Line.starts_with("+ ")) { + llvm::SmallVector<MDNode, 4> Items; + while (I < Lines.size()) { + llvm::StringRef L = Lines[I].trim(); + if (!L.starts_with("- ") && !L.starts_with("* ") && + !L.starts_with("+ ")) + break; + MDNode Item; + Item.Kind = NodeKind::ListItem; + Item.Content = L.drop_front(2).trim(); + Item.Children = {}; + Items.push_back(Item); + ++I; + } + MDNode List; + List.Kind = NodeKind::UnorderedList; + List.Content = {}; + List.Children = allocateNodes(Items, Arena); + Nodes.push_back(List); + continue; + } + + // Plain text fallback + Nodes.push_back(makeText(Line)); + ++I; + } + + return allocateNodes(Nodes, Arena); +} + +} // namespace markdown +} // namespace doc +} // namespace clang \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h new file mode 100644 index 0000000000000..bf4815e068b53 --- /dev/null +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -0,0 +1,59 @@ +//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a standalone Markdown parsing library for the LLVM +// ecosystem. The parser takes plain text and returns a tree of typed nodes +// with no knowledge of comments, Doxygen, or Clang-Doc internals. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H +#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" + +namespace clang { +namespace doc { +namespace markdown { + +enum class NodeKind : uint8_t { + // Block nodes + Paragraph, + FencedCode, + Table, + UnorderedList, + OrderedList, + ListItem, + ThematicBreak, + // Inline nodes + Text, + InlineCode, + Emphasis, + Strong, + SoftBreak, +}; + +struct MDNode { + NodeKind Kind; + llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text + llvm::ArrayRef<MDNode> Children; // arena allocated +}; + +// Parses Markdown from a single comment paragraph's text. +// Returns an empty ArrayRef if no Markdown constructs are found, +// so generators can fall back to plain-text rendering at zero cost. +llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, + llvm::BumpPtrAllocator &Arena); + +} // namespace markdown +} // namespace doc +} // namespace clang + +#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H \ No newline at end of file >From 750b43aacf1705b707ae736e58deb1e55e5d169a Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 29 May 2026 14:27:09 -0400 Subject: [PATCH 05/12] [clang-doc] Fix enum prefixes and file headers --- .../clang-doc/support/Markdown.cpp | 21 +++++---- .../clang-doc/support/Markdown.h | 45 ++++++++++--------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index 17ee61369fb6b..bbce53fa17156 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -1,4 +1,4 @@ -//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -15,16 +15,15 @@ namespace clang { namespace doc { namespace markdown { -static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; } +static MDNode makeText(llvm::StringRef S) { + return {NodeKind::NK_Text, S, {}}; +} // A line is a table separator if it only contains |, -, :, and spaces, // and has at least one -. static bool isSepRow(llvm::StringRef Line) { - return llvm::all_of(Line, - [](char C) { - return C == '|' || C == '-' || C == ':' || C == ' '; - }) && - Line.contains('-'); + return Line.contains('-') && + Line.find_first_not_of("|-: ") == llvm::StringRef::npos; } static llvm::ArrayRef<MDNode> @@ -73,7 +72,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, } ++I; // skip closing fence MDNode Code; - Code.Kind = NodeKind::FencedCode; + Code.Kind = NodeKind::NK_FencedCode; Code.Content = Lang; Code.Children = allocateNodes(CodeLines, Arena); Nodes.push_back(Code); @@ -89,7 +88,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, ++I; } MDNode Table; - Table.Kind = NodeKind::Table; + Table.Kind = NodeKind::NK_Table; Table.Content = {}; Table.Children = allocateNodes(Rows, Arena); Nodes.push_back(Table); @@ -106,14 +105,14 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, !L.starts_with("+ ")) break; MDNode Item; - Item.Kind = NodeKind::ListItem; + Item.Kind = NodeKind::NK_ListItem; Item.Content = L.drop_front(2).trim(); Item.Children = {}; Items.push_back(Item); ++I; } MDNode List; - List.Kind = NodeKind::UnorderedList; + List.Kind = NodeKind::NK_UnorderedList; List.Content = {}; List.Children = allocateNodes(Items, Arena); Nodes.push_back(List); diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index bf4815e068b53..e665170473601 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -1,15 +1,16 @@ -//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===// +//===----------------------------------------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -// -// This file defines a standalone Markdown parsing library for the LLVM -// ecosystem. The parser takes plain text and returns a tree of typed nodes -// with no knowledge of comments, Doxygen, or Clang-Doc internals. -// +/// +/// \file +/// This file defines a standalone Markdown parsing library for the LLVM +/// ecosystem. The parser takes plain text and returns a tree of typed nodes +/// with no knowledge of comments, Doxygen, or Clang-Doc internals. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H @@ -23,21 +24,21 @@ namespace clang { namespace doc { namespace markdown { -enum class NodeKind : uint8_t { +enum class NodeKind { // Block nodes - Paragraph, - FencedCode, - Table, - UnorderedList, - OrderedList, - ListItem, - ThematicBreak, + NK_Paragraph, + NK_FencedCode, + NK_Table, + NK_UnorderedList, + NK_OrderedList, + NK_ListItem, + NK_ThematicBreak, // Inline nodes - Text, - InlineCode, - Emphasis, - Strong, - SoftBreak, + NK_Text, + NK_InlineCode, + NK_Emphasis, + NK_Strong, + NK_SoftBreak, }; struct MDNode { @@ -46,9 +47,9 @@ struct MDNode { llvm::ArrayRef<MDNode> Children; // arena allocated }; -// Parses Markdown from a single comment paragraph's text. -// Returns an empty ArrayRef if no Markdown constructs are found, -// so generators can fall back to plain-text rendering at zero cost. +/// Parses Markdown from a single comment paragraph's text. +/// Returns an empty ArrayRef if no Markdown constructs are found, +/// so generators can fall back to plain-text rendering at zero cost. llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, llvm::BumpPtrAllocator &Arena); >From 434e6328c40d4f10ab70b5a4e28ca70bc1e7edd5 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 29 May 2026 16:17:59 -0400 Subject: [PATCH 06/12] [clang-doc] Add unit tests for Markdown parser --- .../unittests/clang-doc/CMakeLists.txt | 4 +- .../clang-doc/MarkdownParserTest.cpp | 94 +++++++++++++++++++ 2 files changed, 97 insertions(+), 1 deletion(-) create mode 100644 clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp diff --git a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt index 01b34ec9a791e..b74207ac88fa7 100644 --- a/clang-tools-extra/unittests/clang-doc/CMakeLists.txt +++ b/clang-tools-extra/unittests/clang-doc/CMakeLists.txt @@ -26,6 +26,7 @@ add_extra_unittest(ClangDocTests ClangDocTest.cpp GeneratorTest.cpp HTMLGeneratorTest.cpp + MarkdownParserTest.cpp MDGeneratorTest.cpp MergeTest.cpp SerializeTest.cpp @@ -49,5 +50,6 @@ clang_target_link_libraries(ClangDocTests target_link_libraries(ClangDocTests PRIVATE clangDoc + clangDocSupport LLVMTestingSupport - ) + ) \ No newline at end of file diff --git a/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp new file mode 100644 index 0000000000000..8df5efc7f1d5f --- /dev/null +++ b/clang-tools-extra/unittests/clang-doc/MarkdownParserTest.cpp @@ -0,0 +1,94 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "support/Markdown.h" +#include "llvm/Support/Allocator.h" +#include "gtest/gtest.h" + +using namespace clang::doc::markdown; + +namespace { + +TEST(MarkdownParserTest, EmptyInput) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("", Arena); + EXPECT_TRUE(Nodes.empty()); +} + +TEST(MarkdownParserTest, WhitespaceOnlyInput) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown(" \n \n", Arena); + EXPECT_TRUE(Nodes.empty()); +} + +TEST(MarkdownParserTest, PlainText) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("hello world", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Text); + EXPECT_EQ(Nodes[0].Content, "hello world"); +} + +TEST(MarkdownParserTest, FencedCodeBlock) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("```cpp\nint x = 0;\n```", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode); + EXPECT_EQ(Nodes[0].Content, "cpp"); + ASSERT_EQ(Nodes[0].Children.size(), 1u); +} + +TEST(MarkdownParserTest, FencedCodeBlockNoLang) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("```\nsome code\n```", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_FencedCode); + EXPECT_TRUE(Nodes[0].Content.empty()); +} + +TEST(MarkdownParserTest, UnterminatedFenceReturnsEmpty) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("```cpp\nint x = 0;", Arena); + // Unterminated fence should not crash and should produce a code node + // with whatever lines were found. + EXPECT_FALSE(Nodes.empty()); +} + +TEST(MarkdownParserTest, PipeTable) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("| A | B |\n|---|---|\n| 1 | 2 |", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_Table); +} + +TEST(MarkdownParserTest, PipeCharacterWithoutSepRowIsPlainText) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("a | b\nc | d", Arena); + // No separator row so should not be parsed as a table + for (const auto &Node : Nodes) + EXPECT_NE(Node.Kind, NodeKind::NK_Table); +} + +TEST(MarkdownParserTest, UnorderedList) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("- foo\n- bar\n- baz", Arena); + ASSERT_EQ(Nodes.size(), 1u); + EXPECT_EQ(Nodes[0].Kind, NodeKind::NK_UnorderedList); + ASSERT_EQ(Nodes[0].Children.size(), 3u); + EXPECT_EQ(Nodes[0].Children[0].Content, "foo"); + EXPECT_EQ(Nodes[0].Children[1].Content, "bar"); + EXPECT_EQ(Nodes[0].Children[2].Content, "baz"); +} + +TEST(MarkdownParserTest, MixedContent) { + llvm::BumpPtrAllocator Arena; + auto Nodes = parseMarkdown("some text\n```\ncode\n```\n- item", Arena); + EXPECT_EQ(Nodes.size(), 3u); +} + +} // namespace \ No newline at end of file >From fb907ac64844b5aa7c0679e32884438dc454949a Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 29 May 2026 16:21:43 -0400 Subject: [PATCH 07/12] [clang-doc] Add design documentation to Markdown.h --- clang-tools-extra/clang-doc/support/Markdown.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/clang-tools-extra/clang-doc/support/Markdown.h b/clang-tools-extra/clang-doc/support/Markdown.h index e665170473601..0ae33e33e7eba 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.h +++ b/clang-tools-extra/clang-doc/support/Markdown.h @@ -11,6 +11,22 @@ /// ecosystem. The parser takes plain text and returns a tree of typed nodes /// with no knowledge of comments, Doxygen, or Clang-Doc internals. /// +/// This is a simple Markdown parser for use inside Clang-Doc's comment +/// pipeline. You give it a paragraph of text and an arena allocator, and it +/// gives back a list of typed nodes describing the Markdown structure it found. +/// +/// The main entry point is parseMarkdown(). If the text has no Markdown in it, +/// you get back an empty list and can fall back to plain-text output. If it +/// does, you get a tree of MDNode structs where each node has a kind, optional +/// content (like the language tag on a code fence), and optional children. +/// +/// All nodes are allocated in the arena you pass in. You own the arena and are +/// responsible for keeping it alive as long as you use the nodes. +/// +/// The parser handles fenced code blocks, pipe tables, and unordered lists. +/// Anything it does not recognize comes back as a plain text node. It will +/// never crash on bad input. +/// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H >From 5ea6a4ee7b6f41862fbc78b4a348baae32b597d9 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 5 Jun 2026 00:50:59 -0400 Subject: [PATCH 08/12] [clang-doc] Integrate Markdown parser into MDGenerator for fenced code blocks --- clang-tools-extra/clang-doc/MDGenerator.cpp | 87 ++++++++++++++++++++- 1 file changed, 83 insertions(+), 4 deletions(-) diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp b/clang-tools-extra/clang-doc/MDGenerator.cpp index df1ca6b868d43..0a01ec5aeba2d 100644 --- a/clang-tools-extra/clang-doc/MDGenerator.cpp +++ b/clang-tools-extra/clang-doc/MDGenerator.cpp @@ -8,14 +8,19 @@ #include "Generators.h" #include "Representation.h" +#include "support/Markdown.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" #include "llvm/Support/raw_ostream.h" #include <string> +#define DEBUG_TYPE "clang-doc-md-generator" + using namespace llvm; namespace clang { @@ -54,12 +59,10 @@ static void writeHeader(const Twine &Text, unsigned int Num, raw_ostream &OS) { static void writeSourceFileRef(const ClangDocContext &CDCtx, const Location &L, raw_ostream &OS) { - if (!CDCtx.RepositoryUrl) { OS << "*Defined at " << L.Filename << "#" << std::to_string(L.StartLineNumber) << "*"; } else { - OS << formatv("*Defined at [#{0}{1}{2}](#{0}{1}{3})*", CDCtx.RepositoryLinePrefix.value_or(""), L.StartLineNumber, L.Filename, *CDCtx.RepositoryUrl); @@ -148,6 +151,40 @@ static void maybeWriteSourceFileRef(llvm::raw_ostream &OS, writeSourceFileRef(CDCtx, *DefLoc, OS); } +/// Writes a single parsed Markdown node to the output stream. +/// +/// Currently handles fenced code blocks and plain text. All other node +/// kinds fall back to emitting their content as plain text so that +/// unrecognised constructs never produce empty output. +static void writeMDNode(const markdown::MDNode &Node, raw_ostream &OS) { + switch (Node.Kind) { + case markdown::NodeKind::NK_FencedCode: + // Emit a Markdown fenced code block, preserving the language tag if one + // was present on the opening fence line. + OS << "\n```"; + if (!Node.Content.empty()) + OS << Node.Content; + OS << "\n"; + for (const auto &Line : Node.Children) + OS << Line.Content << "\n"; + OS << "```\n\n"; + break; + + case markdown::NodeKind::NK_Text: + OS << Node.Content; + break; + + default: + // Unhandled node kinds: emit content and recurse into children so + // nothing is silently dropped. + if (!Node.Content.empty()) + OS << Node.Content; + for (const auto &Child : Node.Children) + writeMDNode(Child, OS); + break; + } +} + static void writeDescription(const CommentInfo &I, raw_ostream &OS) { switch (I.Kind) { case CommentKind::CK_FullComment: @@ -155,11 +192,53 @@ static void writeDescription(const CommentInfo &I, raw_ostream &OS) { writeDescription(Child, OS); break; - case CommentKind::CK_ParagraphComment: + case CommentKind::CK_ParagraphComment: { + // Clang's comment parser represents each line of a documentation + // paragraph as a separate CK_TextComment child. To parse Markdown + // constructs that span multiple lines (e.g. fenced code blocks), + // we concatenate all text-only children and attempt to parse the + // result as Markdown before falling back to plain-text emission. + // + // If the paragraph contains non-text children (inline commands, + // HTML tags, etc.) we skip Markdown parsing entirely and fall back + // to the original recursive approach so existing behaviour is + // preserved. + bool AllTextChildren = true; + for (const auto &Child : I.Children) + if (Child.Kind != CommentKind::CK_TextComment) { + AllTextChildren = false; + break; + } + + if (AllTextChildren && !I.Children.empty()) { + std::string ParagraphText; + llvm::raw_string_ostream TextOS(ParagraphText); + for (const auto &Child : I.Children) + if (!Child.Text.empty()) + TextOS << Child.Text << "\n"; + + // The allocator is scoped to this paragraph; nodes must not outlive it. + llvm::BumpPtrAllocator Arena; + auto Nodes = markdown::parseMarkdown(ParagraphText, Arena); + + LLVM_DEBUG(llvm::dbgs() + << "[clang-doc] paragraph -> " << Nodes.size() + << " Markdown node(s)\n"); + + if (!Nodes.empty()) { + for (const auto &Node : Nodes) + writeMDNode(Node, OS); + writeNewLine(OS); + break; + } + } + + // Fall back: emit children recursively (original behaviour). for (const auto &Child : I.Children) writeDescription(Child, OS); writeNewLine(OS); break; + } case CommentKind::CK_BlockCommandComment: OS << genEmphasis(I.Name) << " "; @@ -608,4 +687,4 @@ static GeneratorRegistry::Add<MDGenerator> MD(MDGenerator::Format, volatile int MDGeneratorAnchorSource = 0; } // namespace doc -} // namespace clang +} // namespace clang \ No newline at end of file >From c950a96be144926daba5bb1a88104eb7d2bd651e Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 5 Jun 2026 01:08:58 -0400 Subject: [PATCH 09/12] [clang-doc] Add FileCheck test for Markdown fenced code block integration --- .../test/clang-doc/markdown-fenced-code.cpp | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp diff --git a/clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp b/clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp new file mode 100644 index 0000000000000..a55b73b3a569e --- /dev/null +++ b/clang-tools-extra/test/clang-doc/markdown-fenced-code.cpp @@ -0,0 +1,15 @@ +// RUN: rm -rf %t && mkdir -p %t +// RUN: clang-doc --format=md --doxygen --output=%t --executor=standalone %s 2>&1 || true +// RUN: FileCheck %s < %t/GlobalNamespace/index.md + +/// A function with a fenced code block in its documentation. +/// Example usage: +/// ```cpp +/// int x = documented(); +/// ``` +int documented(); + +// CHECK: ### documented +// CHECK: ```cpp +// CHECK: int x = documented(); +// CHECK: ``` \ No newline at end of file >From ddf1f20bb0af4ee6494cd3d5e53be965a5802430 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 5 Jun 2026 01:20:15 -0400 Subject: [PATCH 10/12] [clang-doc] Add LLVM_DEBUG macros to Markdown parser --- .../clang-doc/support/Markdown.cpp | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index bbce53fa17156..49c141bc2baed 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -10,6 +10,10 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +#define DEBUG_TYPE "clang-doc-markdown" namespace clang { namespace doc { @@ -38,12 +42,17 @@ allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes, llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, llvm::BumpPtrAllocator &Arena) { - if (ParagraphText.trim().empty()) + if (ParagraphText.trim().empty()) { + LLVM_DEBUG(llvm::dbgs() << "[md] empty input, returning nothing\n"); return {}; + } llvm::SmallVector<llvm::StringRef, 16> Lines; ParagraphText.split(Lines, '\n'); + LLVM_DEBUG(llvm::dbgs() << "[md] parsing " << Lines.size() + << " line(s)\n"); + llvm::SmallVector<MDNode, 8> Nodes; unsigned I = 0; @@ -59,14 +68,19 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, if (Line.starts_with("```") || Line.starts_with("~~~")) { char Fence = Line[0]; llvm::StringRef Lang = Line.drop_front(3).trim(); + LLVM_DEBUG(llvm::dbgs() << "[md] fenced code block, lang='" + << Lang << "'\n"); llvm::SmallVector<MDNode, 4> CodeLines; ++I; while (I < Lines.size()) { llvm::StringRef CodeLine = Lines[I].trim(); if (CodeLine.size() >= 3 && llvm::all_of(CodeLine.take_front(3), - [Fence](char C) { return C == Fence; })) + [Fence](char C) { return C == Fence; })) { + LLVM_DEBUG(llvm::dbgs() << "[md] closing fence found at line " + << I << "\n"); break; + } CodeLines.push_back(makeText(Lines[I])); ++I; } @@ -75,6 +89,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, Code.Kind = NodeKind::NK_FencedCode; Code.Content = Lang; Code.Children = allocateNodes(CodeLines, Arena); + LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_FencedCode with " + << CodeLines.size() << " line(s)\n"); Nodes.push_back(Code); continue; } @@ -82,6 +98,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, // Pipe table: current line has | and next line is a separator row if (Line.contains('|') && I + 1 < Lines.size() && isSepRow(Lines[I + 1].trim())) { + LLVM_DEBUG(llvm::dbgs() << "[md] pipe table detected at line " + << I << "\n"); llvm::SmallVector<MDNode, 4> Rows; while (I < Lines.size() && Lines[I].trim().contains('|')) { Rows.push_back(makeText(Lines[I].trim())); @@ -91,6 +109,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, Table.Kind = NodeKind::NK_Table; Table.Content = {}; Table.Children = allocateNodes(Rows, Arena); + LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_Table with " + << Rows.size() << " row(s)\n"); Nodes.push_back(Table); continue; } @@ -98,6 +118,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, // Unordered list item if (Line.starts_with("- ") || Line.starts_with("* ") || Line.starts_with("+ ")) { + LLVM_DEBUG(llvm::dbgs() << "[md] unordered list at line " << I + << "\n"); llvm::SmallVector<MDNode, 4> Items; while (I < Lines.size()) { llvm::StringRef L = Lines[I].trim(); @@ -115,15 +137,20 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, List.Kind = NodeKind::NK_UnorderedList; List.Content = {}; List.Children = allocateNodes(Items, Arena); + LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_UnorderedList with " + << Items.size() << " item(s)\n"); Nodes.push_back(List); continue; } // Plain text fallback + LLVM_DEBUG(llvm::dbgs() << "[md] plain text: '" << Line << "'\n"); Nodes.push_back(makeText(Line)); ++I; } + LLVM_DEBUG(llvm::dbgs() << "[md] done, " << Nodes.size() + << " top-level node(s)\n"); return allocateNodes(Nodes, Arena); } >From 55b0e43e371a619ad74c7253d8f01236787f88b7 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 5 Jun 2026 01:30:32 -0400 Subject: [PATCH 11/12] [clang-doc] Fix clang-format violations in LLVM_DEBUG macros --- clang-tools-extra/clang-doc/MDGenerator.cpp | 5 ++-- .../clang-doc/support/Markdown.cpp | 28 +++++++++---------- 2 files changed, 15 insertions(+), 18 deletions(-) diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp b/clang-tools-extra/clang-doc/MDGenerator.cpp index 0a01ec5aeba2d..80a43970d4d05 100644 --- a/clang-tools-extra/clang-doc/MDGenerator.cpp +++ b/clang-tools-extra/clang-doc/MDGenerator.cpp @@ -221,9 +221,8 @@ static void writeDescription(const CommentInfo &I, raw_ostream &OS) { llvm::BumpPtrAllocator Arena; auto Nodes = markdown::parseMarkdown(ParagraphText, Arena); - LLVM_DEBUG(llvm::dbgs() - << "[clang-doc] paragraph -> " << Nodes.size() - << " Markdown node(s)\n"); + LLVM_DEBUG(llvm::dbgs() << "[clang-doc] paragraph -> " << Nodes.size() + << " Markdown node(s)\n"); if (!Nodes.empty()) { for (const auto &Node : Nodes) diff --git a/clang-tools-extra/clang-doc/support/Markdown.cpp b/clang-tools-extra/clang-doc/support/Markdown.cpp index 49c141bc2baed..3c425d89527c1 100644 --- a/clang-tools-extra/clang-doc/support/Markdown.cpp +++ b/clang-tools-extra/clang-doc/support/Markdown.cpp @@ -50,8 +50,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, llvm::SmallVector<llvm::StringRef, 16> Lines; ParagraphText.split(Lines, '\n'); - LLVM_DEBUG(llvm::dbgs() << "[md] parsing " << Lines.size() - << " line(s)\n"); + LLVM_DEBUG(llvm::dbgs() << "[md] parsing " << Lines.size() << " line(s)\n"); llvm::SmallVector<MDNode, 8> Nodes; unsigned I = 0; @@ -68,8 +67,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, if (Line.starts_with("```") || Line.starts_with("~~~")) { char Fence = Line[0]; llvm::StringRef Lang = Line.drop_front(3).trim(); - LLVM_DEBUG(llvm::dbgs() << "[md] fenced code block, lang='" - << Lang << "'\n"); + LLVM_DEBUG(llvm::dbgs() + << "[md] fenced code block, lang='" << Lang << "'\n"); llvm::SmallVector<MDNode, 4> CodeLines; ++I; while (I < Lines.size()) { @@ -77,8 +76,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, if (CodeLine.size() >= 3 && llvm::all_of(CodeLine.take_front(3), [Fence](char C) { return C == Fence; })) { - LLVM_DEBUG(llvm::dbgs() << "[md] closing fence found at line " - << I << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "[md] closing fence found at line " << I << "\n"); break; } CodeLines.push_back(makeText(Lines[I])); @@ -90,7 +89,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, Code.Content = Lang; Code.Children = allocateNodes(CodeLines, Arena); LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_FencedCode with " - << CodeLines.size() << " line(s)\n"); + << CodeLines.size() << " line(s)\n"); Nodes.push_back(Code); continue; } @@ -98,8 +97,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, // Pipe table: current line has | and next line is a separator row if (Line.contains('|') && I + 1 < Lines.size() && isSepRow(Lines[I + 1].trim())) { - LLVM_DEBUG(llvm::dbgs() << "[md] pipe table detected at line " - << I << "\n"); + LLVM_DEBUG(llvm::dbgs() + << "[md] pipe table detected at line " << I << "\n"); llvm::SmallVector<MDNode, 4> Rows; while (I < Lines.size() && Lines[I].trim().contains('|')) { Rows.push_back(makeText(Lines[I].trim())); @@ -109,8 +108,8 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, Table.Kind = NodeKind::NK_Table; Table.Content = {}; Table.Children = allocateNodes(Rows, Arena); - LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_Table with " - << Rows.size() << " row(s)\n"); + LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_Table with " << Rows.size() + << " row(s)\n"); Nodes.push_back(Table); continue; } @@ -118,8 +117,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, // Unordered list item if (Line.starts_with("- ") || Line.starts_with("* ") || Line.starts_with("+ ")) { - LLVM_DEBUG(llvm::dbgs() << "[md] unordered list at line " << I - << "\n"); + LLVM_DEBUG(llvm::dbgs() << "[md] unordered list at line " << I << "\n"); llvm::SmallVector<MDNode, 4> Items; while (I < Lines.size()) { llvm::StringRef L = Lines[I].trim(); @@ -138,7 +136,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, List.Content = {}; List.Children = allocateNodes(Items, Arena); LLVM_DEBUG(llvm::dbgs() << "[md] emitting NK_UnorderedList with " - << Items.size() << " item(s)\n"); + << Items.size() << " item(s)\n"); Nodes.push_back(List); continue; } @@ -150,7 +148,7 @@ llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, } LLVM_DEBUG(llvm::dbgs() << "[md] done, " << Nodes.size() - << " top-level node(s)\n"); + << " top-level node(s)\n"); return allocateNodes(Nodes, Arena); } >From ece5dd18712cc7e974a4e2d6cae1bc5edd5a3382 Mon Sep 17 00:00:00 2001 From: Neil-N4 <[email protected]> Date: Fri, 5 Jun 2026 02:14:26 -0400 Subject: [PATCH 12/12] [clang-doc] Switch to LDBG() for debug output --- clang-tools-extra/clang-doc/MDGenerator.cpp | 8 ++ clang-tools-extra/clang-doc/Markdown.cpp | 133 -------------------- clang-tools-extra/clang-doc/Markdown.h | 59 --------- 3 files changed, 8 insertions(+), 192 deletions(-) delete mode 100644 clang-tools-extra/clang-doc/Markdown.cpp delete mode 100644 clang-tools-extra/clang-doc/Markdown.h diff --git a/clang-tools-extra/clang-doc/MDGenerator.cpp b/clang-tools-extra/clang-doc/MDGenerator.cpp index 80a43970d4d05..a8ba954fa063d 100644 --- a/clang-tools-extra/clang-doc/MDGenerator.cpp +++ b/clang-tools-extra/clang-doc/MDGenerator.cpp @@ -12,7 +12,11 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" +<<<<<<< Updated upstream #include "llvm/Support/Debug.h" +======= +#include "llvm/Support/DebugLog.h" +>>>>>>> Stashed changes #include "llvm/Support/FileSystem.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/Path.h" @@ -221,8 +225,12 @@ static void writeDescription(const CommentInfo &I, raw_ostream &OS) { llvm::BumpPtrAllocator Arena; auto Nodes = markdown::parseMarkdown(ParagraphText, Arena); +<<<<<<< Updated upstream LLVM_DEBUG(llvm::dbgs() << "[clang-doc] paragraph -> " << Nodes.size() << " Markdown node(s)\n"); +======= + LDBG() << "paragraph -> " << Nodes.size() << " Markdown node(s)"; +>>>>>>> Stashed changes if (!Nodes.empty()) { for (const auto &Node : Nodes) diff --git a/clang-tools-extra/clang-doc/Markdown.cpp b/clang-tools-extra/clang-doc/Markdown.cpp deleted file mode 100644 index 17ee61369fb6b..0000000000000 --- a/clang-tools-extra/clang-doc/Markdown.cpp +++ /dev/null @@ -1,133 +0,0 @@ -//===-- Markdown.cpp - Markdown Parser --------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#include "Markdown.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" - -namespace clang { -namespace doc { -namespace markdown { - -static MDNode makeText(llvm::StringRef S) { return {NodeKind::Text, S, {}}; } - -// A line is a table separator if it only contains |, -, :, and spaces, -// and has at least one -. -static bool isSepRow(llvm::StringRef Line) { - return llvm::all_of(Line, - [](char C) { - return C == '|' || C == '-' || C == ':' || C == ' '; - }) && - Line.contains('-'); -} - -static llvm::ArrayRef<MDNode> -allocateNodes(llvm::SmallVectorImpl<MDNode> &Nodes, - llvm::BumpPtrAllocator &Arena) { - if (Nodes.empty()) - return {}; - MDNode *Allocated = Arena.Allocate<MDNode>(Nodes.size()); - std::uninitialized_copy(Nodes.begin(), Nodes.end(), Allocated); - return llvm::ArrayRef<MDNode>(Allocated, Nodes.size()); -} - -llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, - llvm::BumpPtrAllocator &Arena) { - if (ParagraphText.trim().empty()) - return {}; - - llvm::SmallVector<llvm::StringRef, 16> Lines; - ParagraphText.split(Lines, '\n'); - - llvm::SmallVector<MDNode, 8> Nodes; - unsigned I = 0; - - while (I < Lines.size()) { - llvm::StringRef Line = Lines[I].trim(); - - if (Line.empty()) { - ++I; - continue; - } - - // Fenced code block: ``` or ~~~ - if (Line.starts_with("```") || Line.starts_with("~~~")) { - char Fence = Line[0]; - llvm::StringRef Lang = Line.drop_front(3).trim(); - llvm::SmallVector<MDNode, 4> CodeLines; - ++I; - while (I < Lines.size()) { - llvm::StringRef CodeLine = Lines[I].trim(); - if (CodeLine.size() >= 3 && - llvm::all_of(CodeLine.take_front(3), - [Fence](char C) { return C == Fence; })) - break; - CodeLines.push_back(makeText(Lines[I])); - ++I; - } - ++I; // skip closing fence - MDNode Code; - Code.Kind = NodeKind::FencedCode; - Code.Content = Lang; - Code.Children = allocateNodes(CodeLines, Arena); - Nodes.push_back(Code); - continue; - } - - // Pipe table: current line has | and next line is a separator row - if (Line.contains('|') && I + 1 < Lines.size() && - isSepRow(Lines[I + 1].trim())) { - llvm::SmallVector<MDNode, 4> Rows; - while (I < Lines.size() && Lines[I].trim().contains('|')) { - Rows.push_back(makeText(Lines[I].trim())); - ++I; - } - MDNode Table; - Table.Kind = NodeKind::Table; - Table.Content = {}; - Table.Children = allocateNodes(Rows, Arena); - Nodes.push_back(Table); - continue; - } - - // Unordered list item - if (Line.starts_with("- ") || Line.starts_with("* ") || - Line.starts_with("+ ")) { - llvm::SmallVector<MDNode, 4> Items; - while (I < Lines.size()) { - llvm::StringRef L = Lines[I].trim(); - if (!L.starts_with("- ") && !L.starts_with("* ") && - !L.starts_with("+ ")) - break; - MDNode Item; - Item.Kind = NodeKind::ListItem; - Item.Content = L.drop_front(2).trim(); - Item.Children = {}; - Items.push_back(Item); - ++I; - } - MDNode List; - List.Kind = NodeKind::UnorderedList; - List.Content = {}; - List.Children = allocateNodes(Items, Arena); - Nodes.push_back(List); - continue; - } - - // Plain text fallback - Nodes.push_back(makeText(Line)); - ++I; - } - - return allocateNodes(Nodes, Arena); -} - -} // namespace markdown -} // namespace doc -} // namespace clang \ No newline at end of file diff --git a/clang-tools-extra/clang-doc/Markdown.h b/clang-tools-extra/clang-doc/Markdown.h deleted file mode 100644 index bf4815e068b53..0000000000000 --- a/clang-tools-extra/clang-doc/Markdown.h +++ /dev/null @@ -1,59 +0,0 @@ -//===-- Markdown.h - Markdown Parser ----------------------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines a standalone Markdown parsing library for the LLVM -// ecosystem. The parser takes plain text and returns a tree of typed nodes -// with no knowledge of comments, Doxygen, or Clang-Doc internals. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H -#define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/Support/Allocator.h" - -namespace clang { -namespace doc { -namespace markdown { - -enum class NodeKind : uint8_t { - // Block nodes - Paragraph, - FencedCode, - Table, - UnorderedList, - OrderedList, - ListItem, - ThematicBreak, - // Inline nodes - Text, - InlineCode, - Emphasis, - Strong, - SoftBreak, -}; - -struct MDNode { - NodeKind Kind; - llvm::StringRef Content; // lang tag for FencedCode, leaf text for Text - llvm::ArrayRef<MDNode> Children; // arena allocated -}; - -// Parses Markdown from a single comment paragraph's text. -// Returns an empty ArrayRef if no Markdown constructs are found, -// so generators can fall back to plain-text rendering at zero cost. -llvm::ArrayRef<MDNode> parseMarkdown(llvm::StringRef ParagraphText, - llvm::BumpPtrAllocator &Arena); - -} // namespace markdown -} // namespace doc -} // namespace clang - -#endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_MARKDOWN_H \ No newline at end of file _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
