[clang] [Clang][Comments] Add argument parsing for @throw @throws @exception (PR #84726)

via cfe-commits Mon, 03 Jun 2024 00:13:10 -0700

https://github.com/hdoc updated https://github.com/llvm/llvm-project/pull/84726


>From ec3f444913d9162de4494cdb09b336b1b00380fa Mon Sep 17 00:00:00 2001
From: hdoc <git...@hdoc.io>
Date: Mon, 11 Mar 2024 01:13:25 -0700
Subject: [PATCH 1/7] Comment parsing: add argument parsing for @throw @throws
 @exception

Doxygen allows for the @throw, @throws, and @exception commands to
have an attached argument indicating the type being thrown. Currently,
Clang's AST parsing doesn't support parsing out this argument from doc
comments. The result is missing compatibility with Doxygen.

We would find it helpful if the AST exposed these thrown types as
BlockCommandComment arguments so that we could generate better
documentation.

This PR implements parsing of arguments for the @throw, @throws, and
@exception commands. Each command can only have one argument, matching
the semantics of Doxygen. We have also added unit tests to validate
the functionality.
---
 clang/include/clang/AST/CommentCommands.td |   6 +-
 clang/include/clang/AST/CommentParser.h    |   3 +
 clang/lib/AST/CommentParser.cpp            | 133 ++++++++++++
 clang/unittests/AST/CommentParser.cpp      | 235 ++++++++++++++++++++-
 4 files changed, 373 insertions(+), 4 deletions(-)

diff --git a/clang/include/clang/AST/CommentCommands.td 
b/clang/include/clang/AST/CommentCommands.td
index e839031752cdd..06b2fa9b5531c 100644
--- a/clang/include/clang/AST/CommentCommands.td
+++ b/clang/include/clang/AST/CommentCommands.td
@@ -132,9 +132,9 @@ def Tparam : BlockCommand<"tparam"> { let IsTParamCommand = 
1; }
 // HeaderDoc command for template parameter documentation.
 def Templatefield : BlockCommand<"templatefield"> { let IsTParamCommand = 1; }
 
-def Throws    : BlockCommand<"throws"> { let IsThrowsCommand = 1; }
-def Throw     : BlockCommand<"throw"> { let IsThrowsCommand = 1; }
-def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; }
+def Throws    : BlockCommand<"throws"> { let IsThrowsCommand = 1; let NumArgs 
= 1; }
+def Throw     : BlockCommand<"throw"> { let IsThrowsCommand = 1; let NumArgs = 
1; }
+def Exception : BlockCommand<"exception"> { let IsThrowsCommand = 1; let 
NumArgs = 1;}
 
 def Deprecated : BlockCommand<"deprecated"> {
   let IsEmptyParagraphAllowed = 1;
diff --git a/clang/include/clang/AST/CommentParser.h 
b/clang/include/clang/AST/CommentParser.h
index e11e818b1af0a..5884a25d00785 100644
--- a/clang/include/clang/AST/CommentParser.h
+++ b/clang/include/clang/AST/CommentParser.h
@@ -100,6 +100,9 @@ class Parser {
   ArrayRef<Comment::Argument>
   parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
 
+  ArrayRef<Comment::Argument>
+  parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
+
   BlockCommandComment *parseBlockCommand();
   InlineCommandComment *parseInlineCommand();
 
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 8adfd85d0160c..c70fa1b05cb24 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -75,6 +75,25 @@ class TextTokenRetokenizer {
     return *Pos.BufferPtr;
   }
 
+  char peekNext(unsigned offset) const {
+    assert(!isEnd());
+    assert(Pos.BufferPtr != Pos.BufferEnd);
+    if (Pos.BufferPtr + offset <= Pos.BufferEnd) {
+      return *(Pos.BufferPtr + offset);
+    } else {
+      return '\0';
+    }
+  }
+
+  void peekNextToken(SmallString<32> &WordText) const {
+    unsigned offset = 1;
+    char C = peekNext(offset++);
+    while (!isWhitespace(C) && C != '\0') {
+      WordText.push_back(C);
+      C = peekNext(offset++);
+    }
+  }
+
   void consumeChar() {
     assert(!isEnd());
     assert(Pos.BufferPtr != Pos.BufferEnd);
@@ -89,6 +108,29 @@ class TextTokenRetokenizer {
     }
   }
 
+  /// Extract a template type
+  bool lexTemplateType(SmallString<32> &WordText) {
+    unsigned IncrementCounter = 0;
+    while (!isEnd()) {
+      const char C = peek();
+      WordText.push_back(C);
+      consumeChar();
+      switch (C) {
+      default:
+        break;
+      case '<': {
+        IncrementCounter++;
+      } break;
+      case '>': {
+        IncrementCounter--;
+        if (!IncrementCounter)
+          return true;
+      } break;
+      }
+    }
+    return false;
+  }
+
   /// Add a token.
   /// Returns true on success, false if there are no interesting tokens to
   /// fetch from lexer.
@@ -149,6 +191,76 @@ class TextTokenRetokenizer {
     addToken();
   }
 
+  /// Extract a type argument
+  bool lexDataType(Token &Tok) {
+    if (isEnd())
+      return false;
+    Position SavedPos = Pos;
+    consumeWhitespace();
+    SmallString<32> NextToken;
+    SmallString<32> WordText;
+    const char *WordBegin = Pos.BufferPtr;
+    SourceLocation Loc = getSourceLocation();
+    StringRef ConstVal = StringRef("const");
+    bool ConstPointer = false;
+
+    while (!isEnd()) {
+      const char C = peek();
+      if (!isWhitespace(C)) {
+        if (C == '<') {
+          if (!lexTemplateType(WordText))
+            return false;
+        } else {
+          WordText.push_back(C);
+          consumeChar();
+        }
+      } else {
+        if (WordText.equals(ConstVal)) {
+          WordText.push_back(C);
+          consumeChar();
+        } else if (WordText.ends_with(StringRef("*")) ||
+                   WordText.ends_with(StringRef("&"))) {
+          NextToken.clear();
+          peekNextToken(NextToken);
+          if (NextToken.equals(ConstVal)) {
+            ConstPointer = true;
+            WordText.push_back(C);
+            consumeChar();
+          } else {
+            consumeChar();
+            break;
+          }
+        } else {
+          NextToken.clear();
+          peekNextToken(NextToken);
+          if ((NextToken.ends_with(StringRef("*")) ||
+               NextToken.ends_with(StringRef("&"))) &&
+              !ConstPointer) {
+            WordText.push_back(C);
+            consumeChar();
+          } else {
+            consumeChar();
+            break;
+          }
+        }
+      }
+    }
+
+    const unsigned Length = WordText.size();
+    if (Length == 0) {
+      Pos = SavedPos;
+      return false;
+    }
+
+    char *TextPtr = Allocator.Allocate<char>(Length + 1);
+
+    memcpy(TextPtr, WordText.c_str(), Length + 1);
+    StringRef Text = StringRef(TextPtr, Length);
+
+    formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
+    return true;
+  }
+
   /// Extract a word -- sequence of non-whitespace characters.
   bool lexWord(Token &Tok) {
     if (isEnd())
@@ -295,6 +407,7 @@ Parser::parseCommandArgs(TextTokenRetokenizer &Retokenizer, 
unsigned NumArgs) {
       Comment::Argument[NumArgs];
   unsigned ParsedArgs = 0;
   Token Arg;
+
   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
     Args[ParsedArgs] = Comment::Argument{
         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
@@ -304,6 +417,23 @@ Parser::parseCommandArgs(TextTokenRetokenizer 
&Retokenizer, unsigned NumArgs) {
   return llvm::ArrayRef(Args, ParsedArgs);
 }
 
+ArrayRef<Comment::Argument>
+Parser::parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer,
+                              unsigned NumArgs) {
+  auto *Args = new (Allocator.Allocate<Comment::Argument>(NumArgs))
+      Comment::Argument[NumArgs];
+  unsigned ParsedArgs = 0;
+  Token Arg;
+
+  while (ParsedArgs < NumArgs && Retokenizer.lexDataType(Arg)) {
+    Args[ParsedArgs] = Comment::Argument{
+        SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
+    ParsedArgs++;
+  }
+
+  return llvm::ArrayRef(Args, ParsedArgs);
+}
+
 BlockCommandComment *Parser::parseBlockCommand() {
   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
 
@@ -356,6 +486,9 @@ BlockCommandComment *Parser::parseBlockCommand() {
       parseParamCommandArgs(PC, Retokenizer);
     else if (TPC)
       parseTParamCommandArgs(TPC, Retokenizer);
+    else if (Info->IsThrowsCommand)
+      S.actOnBlockCommandArgs(
+          BC, parseThrowCommandArgs(Retokenizer, Info->NumArgs));
     else
       S.actOnBlockCommandArgs(BC, parseCommandArgs(Retokenizer, 
Info->NumArgs));
 
diff --git a/clang/unittests/AST/CommentParser.cpp 
b/clang/unittests/AST/CommentParser.cpp
index c3479672ae2a3..e01d654aa1cea 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1427,8 +1427,241 @@ TEST_F(CommentParserTest, Deprecated) {
   }
 }
 
+TEST_F(CommentParserTest, ThrowsCommandHasArg1) {
+  const char *Sources[] = {
+      "/// @throws int This function throws an integer",
+      ("/// @throws\n"
+       "/// int This function throws an integer"),
+      ("/// @throws \n"
+       "/// int This function throws an integer"),
+      ("/// @throws\n"
+       "/// int\n"
+       "/// This function throws an integer"),
+      ("/// @throws \n"
+       "/// int \n"
+       "/// This function throws an integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "int");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg2) {
+  const char *Sources[] = {
+      "/// @throws const int This function throws a const integer",
+      ("/// @throws\n"
+       "/// const int This function throws a const integer"),
+      ("/// @throws \n"
+       "/// const int This function throws a const integer"),
+      ("/// @throws\n"
+       "/// const int\n"
+       "/// This function throws a const integer"),
+      ("/// @throws \n"
+       "/// const int \n"
+       "/// This function throws a const integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const int");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg3) {
+  const char *Sources[] = {
+      "/// @throws const int * This function throws a pointer to a const "
+      "integer\n",
+      ("/// @throws\n"
+       "/// const int * This function throws a pointer to a const integer"),
+      ("/// @throws \n"
+       "/// const int * This function throws a pointer to a const integer"),
+      ("/// @throws\n"
+       "/// const int *\n"
+       "/// This function throws a pointer to a const integer"),
+      ("/// @throws \n"
+       "/// const int *\n"
+       "/// This function throws a pointer to a const integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const int *");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg4) {
+  const char *Sources[] = {
+      "/// @throws const int * const This function throws a const pointer to a 
"
+      "const integer",
+      ("/// @throws\n"
+       "/// const int * const This function throws a const pointer to a const "
+       "integer"),
+      ("/// @throws \n"
+       "/// const int * const This function throws a const pointer to a const "
+       "integer"),
+      ("/// @throws\n"
+       "/// const int * const\n"
+       "/// This function throws a const pointer to a const integer"),
+      ("/// @throws \n"
+       "/// const int * const\n"
+       "/// This function throws a const pointer to a const integer"),
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const int * const");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg5) {
+  const char *Sources[] = {
+      "/// @throws int** This function throws a double pointer to an integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "int**");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg6) {
+  const char *Sources[] = {
+      "/// @throws const char ** double pointer to a constant char pointer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const char **");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg7) {
+  const char *Sources[] = {
+      "/// @throws Error<T> error of type Error<T>",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 3)); // Extra children because <T> is 
parsed
+                                         // as a series of TextComments
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "Error<T>");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg8) {
+  const char *Sources[] = {
+      "/// @throws Error<Container<T>> nested templates",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "Error<Container<T>>");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
+  const char *Sources[] = {
+      "/// @throws Error<Ts...> variadic templates",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "Error<Ts...>");
+    }
+  }
+}
+
 } // unnamed namespace
 
 } // end namespace comments
 } // end namespace clang
-

>From 3463833cb0d9c78d04bcbb9e6a46bd0a0c3e2528 Mon Sep 17 00:00:00 2001
From: hdoc <git...@hdoc.io>
Date: Mon, 11 Mar 2024 16:13:33 -0700
Subject: [PATCH 2/7] Fix comment to XML tests

---
 .../Index/comment-to-html-xml-conversion.cpp  | 53 ++++++++-----------
 1 file changed, 21 insertions(+), 32 deletions(-)

diff --git a/clang/test/Index/comment-to-html-xml-conversion.cpp 
b/clang/test/Index/comment-to-html-xml-conversion.cpp
index d9eefb909653c..291aecf44d112 100644
--- a/clang/test/Index/comment-to-html-xml-conversion.cpp
+++ b/clang/test/Index/comment-to-html-xml-conversion.cpp
@@ -1046,82 +1046,71 @@ void comment_to_xml_conversion_todo_4();
 /// Aaa.
 /// \throws Bbb.
 void comment_to_xml_conversion_exceptions_1();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F@comment_to_xml_conversion_exceptions_1#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para> Bbb.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F@comment_to_xml_conversion_exceptions_1#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \throw Bbb.
 void comment_to_xml_conversion_exceptions_2();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F@comment_to_xml_conversion_exceptions_2#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para> Bbb.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F@comment_to_xml_conversion_exceptions_2#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \exception Bbb.
 void comment_to_xml_conversion_exceptions_3();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F@comment_to_xml_conversion_exceptions_3#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para> Bbb.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F@comment_to_xml_conversion_exceptions_3#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[exception]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[exception] 
Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \throws Bbb.
 /// \throws Ccc.
 /// \throws Ddd.
 void comment_to_xml_conversion_exceptions_4();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F@comment_to_xml_conversion_exceptions_4#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para> Bbb. </Para><Para> Ccc. </Para><Para> 
Ddd.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F@comment_to_xml_conversion_exceptions_4#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.] HasTrailingNewline)
-// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Ccc.] HasTrailingNewline)
-// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Ddd.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Ccc.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Ddd.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 /// Aaa.
 /// \throws Bbb.
 /// \throw Ccc.
 void comment_to_xml_conversion_exceptions_5();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F@comment_to_xml_conversion_exceptions_5#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para> Bbb. </Para><Para> 
Ccc.</Para></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F@comment_to_xml_conversion_exceptions_5#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
 // CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
 // CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Bbb.] HasTrailingNewline)
-// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
-// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw]
-// CHECK-NEXT:         (CXComment_Paragraph
-// CHECK-NEXT:           (CXComment_Text Text=[ Ccc.]))))]
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc.
+// CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
 #endif
 

>From cc6591c797328447e169029025f6e68918d7f074 Mon Sep 17 00:00:00 2001
From: hdoc <git...@hdoc.io>
Date: Tue, 12 Mar 2024 20:15:10 -0700
Subject: [PATCH 3/7] Update XML output routine for Throws commands to fix XML
 validation

XML validation was failing due to the Exception XML being empty, since
the actual exception type was being parsed as an argument instead of
as a ParagraphComment.

This was a result of the change we made to argument parsing.

As a result, I updated the XML output to still output the argument
text in the Para XML, as it was emitted before.
---
 clang/lib/Index/CommentToXML.cpp              | 34 ++++++++++-----
 .../Index/comment-to-html-xml-conversion.cpp  | 42 ++++++++++++++++---
 2 files changed, 59 insertions(+), 17 deletions(-)

diff --git a/clang/lib/Index/CommentToXML.cpp b/clang/lib/Index/CommentToXML.cpp
index 295f3f228ff79..7908311b35d01 100644
--- a/clang/lib/Index/CommentToXML.cpp
+++ b/clang/lib/Index/CommentToXML.cpp
@@ -545,7 +545,8 @@ class CommentASTToXMLConverter :
   void visitParagraphComment(const ParagraphComment *C);
 
   void appendParagraphCommentWithKind(const ParagraphComment *C,
-                                      StringRef Kind);
+                                      StringRef ParagraphKind,
+                                      StringRef PrependBodyText);
 
   void visitBlockCommandComment(const BlockCommandComment *C);
   void visitParamCommandComment(const ParamCommandComment *C);
@@ -679,15 +680,15 @@ CommentASTToXMLConverter::visitHTMLEndTagComment(const 
HTMLEndTagComment *C) {
   Result << ">&lt;/" << C->getTagName() << "&gt;</rawHTML>";
 }
 
-void
-CommentASTToXMLConverter::visitParagraphComment(const ParagraphComment *C) {
-  appendParagraphCommentWithKind(C, StringRef());
+void CommentASTToXMLConverter::visitParagraphComment(
+    const ParagraphComment *C) {
+  appendParagraphCommentWithKind(C, StringRef(), StringRef());
 }
 
 void CommentASTToXMLConverter::appendParagraphCommentWithKind(
-                                  const ParagraphComment *C,
-                                  StringRef ParagraphKind) {
-  if (C->isWhitespace())
+    const ParagraphComment *C, StringRef ParagraphKind,
+    StringRef PrependBodyText) {
+  if (C->isWhitespace() && PrependBodyText.empty())
     return;
 
   if (ParagraphKind.empty())
@@ -695,8 +696,11 @@ void 
CommentASTToXMLConverter::appendParagraphCommentWithKind(
   else
     Result << "<Para kind=\"" << ParagraphKind << "\">";
 
-  for (Comment::child_iterator I = C->child_begin(), E = C->child_end();
-       I != E; ++I) {
+  if (!PrependBodyText.empty())
+    Result << PrependBodyText << " ";
+
+  for (Comment::child_iterator I = C->child_begin(), E = C->child_end(); I != 
E;
+       ++I) {
     visit(*I);
   }
   Result << "</Para>";
@@ -705,8 +709,15 @@ void 
CommentASTToXMLConverter::appendParagraphCommentWithKind(
 void CommentASTToXMLConverter::visitBlockCommandComment(
     const BlockCommandComment *C) {
   StringRef ParagraphKind;
+  StringRef ExceptionType;
 
-  switch (C->getCommandID()) {
+  const unsigned CommandID = C->getCommandID();
+  const CommandInfo *Info = Traits.getCommandInfo(CommandID);
+  if (Info->IsThrowsCommand && C->getNumArgs() > 0) {
+    ExceptionType = C->getArgText(0);
+  }
+
+  switch (CommandID) {
   case CommandTraits::KCI_attention:
   case CommandTraits::KCI_author:
   case CommandTraits::KCI_authors:
@@ -731,7 +742,8 @@ void CommentASTToXMLConverter::visitBlockCommandComment(
     break;
   }
 
-  appendParagraphCommentWithKind(C->getParagraph(), ParagraphKind);
+  appendParagraphCommentWithKind(C->getParagraph(), ParagraphKind,
+                                 ExceptionType);
 }
 
 void CommentASTToXMLConverter::visitParamCommandComment(
diff --git a/clang/test/Index/comment-to-html-xml-conversion.cpp 
b/clang/test/Index/comment-to-html-xml-conversion.cpp
index 291aecf44d112..e0a7cff5a9a3d 100644
--- a/clang/test/Index/comment-to-html-xml-conversion.cpp
+++ b/clang/test/Index/comment-to-html-xml-conversion.cpp
@@ -1046,7 +1046,7 @@ void comment_to_xml_conversion_todo_4();
 /// Aaa.
 /// \throws Bbb.
 void comment_to_xml_conversion_exceptions_1();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F@comment_to_xml_conversion_exceptions_1#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_1:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_1</Name><USR>c:@F@comment_to_xml_conversion_exceptions_1#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_1()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para>Bbb. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1058,7 +1058,7 @@ void comment_to_xml_conversion_exceptions_1();
 /// Aaa.
 /// \throw Bbb.
 void comment_to_xml_conversion_exceptions_2();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F@comment_to_xml_conversion_exceptions_2#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_2:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_2</Name><USR>c:@F@comment_to_xml_conversion_exceptions_2#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_2()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para>Bbb. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1070,7 +1070,7 @@ void comment_to_xml_conversion_exceptions_2();
 /// Aaa.
 /// \exception Bbb.
 void comment_to_xml_conversion_exceptions_3();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F@comment_to_xml_conversion_exceptions_3#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_3:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_3</Name><USR>c:@F@comment_to_xml_conversion_exceptions_3#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_3()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para>Bbb. </Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1084,7 +1084,7 @@ void comment_to_xml_conversion_exceptions_3();
 /// \throws Ccc.
 /// \throws Ddd.
 void comment_to_xml_conversion_exceptions_4();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F@comment_to_xml_conversion_exceptions_4#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_4:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_4</Name><USR>c:@F@comment_to_xml_conversion_exceptions_4#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_4()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para>Bbb. </Para><Para>Ccc. </Para><Para>Ddd. 
</Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1101,7 +1101,7 @@ void comment_to_xml_conversion_exceptions_4();
 /// \throws Bbb.
 /// \throw Ccc.
 void comment_to_xml_conversion_exceptions_5();
-// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F@comment_to_xml_conversion_exceptions_5#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions></Exceptions></Function>]
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_5:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_5</Name><USR>c:@F@comment_to_xml_conversion_exceptions_5#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_5()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para>Bbb. </Para><Para>Ccc. 
</Para></Exceptions></Function>]
 // CHECK-NEXT:  CommentAST=[
 // CHECK-NEXT:    (CXComment_FullComment
 // CHECK-NEXT:       (CXComment_Paragraph
@@ -1112,5 +1112,35 @@ void comment_to_xml_conversion_exceptions_5();
 // CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc.
 // CHECK-NEXT:         (CXComment_Paragraph IsWhitespace)))]
 
-#endif
+/// Aaa.
+/// \throws Bbb subsequent arg text
+void comment_to_xml_conversion_exceptions_6();
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_6:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_6</Name><USR>c:@F@comment_to_xml_conversion_exceptions_6#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_6()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para>Bbb subsequent arg 
text</Para></Exceptions></Function>]
+// CHECK-NEXT:  CommentAST=[
+// CHECK-NEXT:    (CXComment_FullComment
+// CHECK-NEXT:       (CXComment_Paragraph
+// CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
+// CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb
+// CHECK-NEXT:         (CXComment_Paragraph
+// CHECK-NEXT:           (CXComment_Text Text=[subsequent arg text]))))]
 
+/// Aaa.
+/// \throws Bbb subsequent arg text
+/// \throw Ccc subsequent arg text
+void comment_to_xml_conversion_exceptions_7();
+// CHECK: comment-to-html-xml-conversion.cpp:[[@LINE-1]]:6: 
FunctionDecl=comment_to_xml_conversion_exceptions_7:{{.*}} 
FullCommentAsXML=[<Function file="{{[^"]+}}comment-to-html-xml-conversion.cpp" 
line="[[@LINE-1]]" 
column="6"><Name>comment_to_xml_conversion_exceptions_7</Name><USR>c:@F@comment_to_xml_conversion_exceptions_7#</USR><Declaration>void
 comment_to_xml_conversion_exceptions_7()</Declaration><Abstract><Para> Aaa. 
</Para></Abstract><Exceptions><Para>Bbb subsequent arg text </Para><Para>Ccc 
subsequent arg text</Para></Exceptions></Function>]
+// CHECK-NEXT:  CommentAST=[
+// CHECK-NEXT:    (CXComment_FullComment
+// CHECK-NEXT:       (CXComment_Paragraph
+// CHECK-NEXT:         (CXComment_Text Text=[ Aaa.] HasTrailingNewline)
+// CHECK-NEXT:         (CXComment_Text Text=[ ] IsWhitespace))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throws] Arg[0]=Bbb
+// CHECK-NEXT:         (CXComment_Paragraph
+// CHECK-NEXT:           (CXComment_Text Text=[subsequent arg text] 
HasTrailingNewline)
+// CHECK-NEXT:           (CXComment_Text Text=[ ] IsWhitespace)))
+// CHECK-NEXT:       (CXComment_BlockCommand CommandName=[throw] Arg[0]=Ccc
+// CHECK-NEXT:         (CXComment_Paragraph
+// CHECK-NEXT:           (CXComment_Text Text=[subsequent arg text]))))]
+
+#endif

>From 3a4f7bde2766157564475a5f12bc8db8d11a8778 Mon Sep 17 00:00:00 2001
From: hdoc <git...@hdoc.io>
Date: Fri, 15 Mar 2024 11:42:50 -0700
Subject: [PATCH 4/7] Support more qualifiers, such as volatile

---
 clang/lib/AST/CommentParser.cpp       | 99 ++++++++++++++++++++-------
 clang/unittests/AST/CommentParser.cpp | 86 +++++++++++++++++++++++
 2 files changed, 162 insertions(+), 23 deletions(-)

diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index c70fa1b05cb24..a2aa3cfc6d5b5 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -131,6 +131,48 @@ class TextTokenRetokenizer {
     return false;
   }
 
+  bool isDataTypeQualifier(SmallString<32> &WordText) {
+    if (WordText.ends_with(StringRef("const")))
+      return true;
+    if (WordText.ends_with(StringRef("volatile")))
+      return true;
+    if (WordText.ends_with(StringRef("unsigned")))
+      return true;
+    if (WordText.ends_with(StringRef("signed")))
+      return true;
+    if (WordText.ends_with(StringRef("long")))
+      return true;
+    if (WordText.ends_with(StringRef("short")))
+      return true;
+    if (WordText.ends_with(StringRef("restrict")))
+      return true;
+    if (WordText.ends_with(StringRef("auto")))
+      return true;
+    if (WordText.ends_with(StringRef("register")))
+      return true;
+    if (WordText.ends_with(StringRef("static")))
+      return true;
+    if (WordText.ends_with(StringRef("extern")))
+      return true;
+    if (WordText.ends_with(StringRef("struct")))
+      return true;
+    if (WordText.ends_with(StringRef("typedef")))
+      return true;
+    if (WordText.ends_with(StringRef("union")))
+      return true;
+    if (WordText.ends_with(StringRef("void")))
+      return true;
+    return false;
+  }
+
+  bool isScopeResolutionOperator(SmallString<32> &WordText) {
+    return WordText.ends_with(StringRef("::"));
+  }
+
+  bool continueParsing(SmallString<32> &WordText) {
+    return isDataTypeQualifier(WordText) || 
isScopeResolutionOperator(WordText);
+  }
+
   /// Add a token.
   /// Returns true on success, false if there are no interesting tokens to
   /// fetch from lexer.
@@ -192,7 +234,7 @@ class TextTokenRetokenizer {
   }
 
   /// Extract a type argument
-  bool lexDataType(Token &Tok) {
+  bool lexType(Token &Tok) {
     if (isEnd())
       return false;
     Position SavedPos = Pos;
@@ -202,6 +244,8 @@ class TextTokenRetokenizer {
     const char *WordBegin = Pos.BufferPtr;
     SourceLocation Loc = getSourceLocation();
     StringRef ConstVal = StringRef("const");
+    StringRef PointerVal = StringRef("*");
+    StringRef ReferenceVal = StringRef("&");
     bool ConstPointer = false;
 
     while (!isEnd()) {
@@ -215,32 +259,41 @@ class TextTokenRetokenizer {
           consumeChar();
         }
       } else {
-        if (WordText.equals(ConstVal)) {
-          WordText.push_back(C);
+        if (ConstPointer) {
           consumeChar();
-        } else if (WordText.ends_with(StringRef("*")) ||
-                   WordText.ends_with(StringRef("&"))) {
-          NextToken.clear();
-          peekNextToken(NextToken);
-          if (NextToken.equals(ConstVal)) {
-            ConstPointer = true;
-            WordText.push_back(C);
-            consumeChar();
-          } else {
-            consumeChar();
-            break;
-          }
+          break;
         } else {
-          NextToken.clear();
-          peekNextToken(NextToken);
-          if ((NextToken.ends_with(StringRef("*")) ||
-               NextToken.ends_with(StringRef("&"))) &&
-              !ConstPointer) {
+          if (continueParsing(WordText)) {
             WordText.push_back(C);
             consumeChar();
           } else {
-            consumeChar();
-            break;
+            NextToken.clear();
+            peekNextToken(NextToken);
+            if (WordText.ends_with(PointerVal) ||
+                WordText.ends_with(ReferenceVal)) {
+              if (NextToken.equals(ConstVal)) {
+                ConstPointer = true;
+                WordText.push_back(C);
+                consumeChar();
+              } else {
+                consumeChar();
+                break;
+              }
+            } else {
+              if ((NextToken.ends_with(PointerVal) ||
+                   NextToken.ends_with(ReferenceVal))) {
+                WordText.push_back(C);
+                consumeChar();
+              } else {
+                if (continueParsing(NextToken)) {
+                  WordText.push_back(C);
+                  consumeChar();
+                } else {
+                  consumeChar();
+                  break;
+                }
+              }
+            }
           }
         }
       }
@@ -425,7 +478,7 @@ Parser::parseThrowCommandArgs(TextTokenRetokenizer 
&Retokenizer,
   unsigned ParsedArgs = 0;
   Token Arg;
 
-  while (ParsedArgs < NumArgs && Retokenizer.lexDataType(Arg)) {
+  while (ParsedArgs < NumArgs && Retokenizer.lexType(Arg)) {
     Args[ParsedArgs] = Comment::Argument{
         SourceRange(Arg.getLocation(), Arg.getEndLocation()), Arg.getText()};
     ParsedArgs++;
diff --git a/clang/unittests/AST/CommentParser.cpp 
b/clang/unittests/AST/CommentParser.cpp
index e01d654aa1cea..e2e77dd95a9a5 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1661,6 +1661,92 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
   }
 }
 
+TEST_F(CommentParserTest, ThrowsCommandHasArg10) {
+  const char *Sources[] = {
+      "/// @throws const std::map<int, std::string> * pointer to a const map",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "const std::map<int, std::string> *");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg11) {
+  const char *Sources[] = {
+      "/// @throws const std :: map<int, std :: string> * pointer to a "
+      "const map with spaces",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) ==
+                  "const std :: map<int, std :: string> *");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg12) {
+  const char *Sources[] = {
+      "/// @throws volatile int a volatile integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "volatile int");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg13) {
+  const char *Sources[] = {
+      "/// @throws volatile double * volatile pointer to a double",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "volatile double *");
+    }
+  }
+}
+
 } // unnamed namespace
 
 } // end namespace comments

>From fee8def56a6b32a96551d1d0738ab695bd90b2f9 Mon Sep 17 00:00:00 2001
From: hdoc <git...@hdoc.io>
Date: Sun, 24 Mar 2024 00:39:56 -0700
Subject: [PATCH 5/7] Refactor integer argument parsing and add corresponding
 test cases

---
 clang/lib/AST/CommentParser.cpp       | 127 ++++++++++++++++++--------
 clang/unittests/AST/CommentParser.cpp |  42 +++++++++
 2 files changed, 130 insertions(+), 39 deletions(-)

diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index a2aa3cfc6d5b5..63baebe59cdfd 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -108,69 +108,112 @@ class TextTokenRetokenizer {
     }
   }
 
+  bool continueInt(SmallString<32> &NextToken) {
+    return NextToken.ends_with(StringRef("char")) ||
+           NextToken.ends_with(StringRef("int")) ||
+           NextToken.ends_with(StringRef("char*")) ||
+           NextToken.ends_with(StringRef("int*")) ||
+           NextToken.ends_with(StringRef("char&")) ||
+           NextToken.ends_with(StringRef("int&"));
+  }
+
+  bool lexInt(SmallString<32> &WordText, SmallString<32> &NextToken) {
+    unsigned LongCounter = (WordText.ends_with(StringRef("long"))) ? 1 : 0;
+    bool complete = false;
+
+    while (!isEnd()) {
+      const char C = peek();
+      if (!isWhitespace(C)) {
+        WordText.push_back(C);
+        consumeChar();
+      } else {
+
+        NextToken.clear();
+        peekNextToken(NextToken);
+
+        if (WordText.ends_with(StringRef("long"))) {
+          LongCounter++;
+          if (continueInt(NextToken)) {
+            WordText.push_back(C);
+            consumeChar();
+            complete = true;
+            continue;
+          } else {
+            if (LongCounter == 2) {
+              return true;
+            }
+          }
+        } else {
+
+          if (complete || continueInt(WordText)) {
+            return true;
+          }
+        }
+
+        if (NextToken.ends_with(StringRef("long"))) {
+          WordText.push_back(C);
+          consumeChar();
+        } else {
+          return true;
+        }
+      }
+    }
+
+    return false;
+  }
+
   /// Extract a template type
-  bool lexTemplateType(SmallString<32> &WordText) {
+  bool lexTemplate(SmallString<32> &WordText) {
     unsigned IncrementCounter = 0;
     while (!isEnd()) {
       const char C = peek();
       WordText.push_back(C);
       consumeChar();
       switch (C) {
-      default:
-        break;
       case '<': {
         IncrementCounter++;
-      } break;
+        break;
+      }
       case '>': {
         IncrementCounter--;
         if (!IncrementCounter)
           return true;
-      } break;
+        break;
+      }
+      default:
+        break;
       }
     }
     return false;
   }
 
-  bool isDataTypeQualifier(SmallString<32> &WordText) {
-    if (WordText.ends_with(StringRef("const")))
-      return true;
-    if (WordText.ends_with(StringRef("volatile")))
-      return true;
-    if (WordText.ends_with(StringRef("unsigned")))
-      return true;
-    if (WordText.ends_with(StringRef("signed")))
-      return true;
-    if (WordText.ends_with(StringRef("long")))
-      return true;
-    if (WordText.ends_with(StringRef("short")))
-      return true;
-    if (WordText.ends_with(StringRef("restrict")))
-      return true;
-    if (WordText.ends_with(StringRef("auto")))
-      return true;
-    if (WordText.ends_with(StringRef("register")))
-      return true;
-    if (WordText.ends_with(StringRef("static")))
-      return true;
-    if (WordText.ends_with(StringRef("extern")))
-      return true;
-    if (WordText.ends_with(StringRef("struct")))
-      return true;
-    if (WordText.ends_with(StringRef("typedef")))
-      return true;
-    if (WordText.ends_with(StringRef("union")))
-      return true;
-    if (WordText.ends_with(StringRef("void")))
-      return true;
-    return false;
+  bool isTypeQualifier(SmallString<32> &WordText) {
+    return WordText.ends_with(StringRef("const")) ||
+           WordText.ends_with(StringRef("volatile")) ||
+           WordText.ends_with(StringRef("short")) ||
+           WordText.ends_with(StringRef("restrict")) ||
+           WordText.ends_with(StringRef("auto")) ||
+           WordText.ends_with(StringRef("register")) ||
+           WordText.ends_with(StringRef("static")) ||
+           WordText.ends_with(StringRef("extern")) ||
+           WordText.ends_with(StringRef("struct")) ||
+           WordText.ends_with(StringRef("typedef")) ||
+           WordText.ends_with(StringRef("union")) ||
+           WordText.ends_with(StringRef("void"));
   }
 
   bool isScopeResolutionOperator(SmallString<32> &WordText) {
     return WordText.ends_with(StringRef("::"));
   }
 
+  bool isInt(SmallString<32> &WordText) {
+    return WordText.ends_with(StringRef("unsigned")) ||
+           WordText.ends_with(StringRef("long")) ||
+           WordText.ends_with(StringRef("signed"));
+  }
+
   bool continueParsing(SmallString<32> &WordText) {
-    return isDataTypeQualifier(WordText) || 
isScopeResolutionOperator(WordText);
+    return isTypeQualifier(WordText) || isScopeResolutionOperator(WordText);
   }
 
   /// Add a token.
@@ -252,7 +295,7 @@ class TextTokenRetokenizer {
       const char C = peek();
       if (!isWhitespace(C)) {
         if (C == '<') {
-          if (!lexTemplateType(WordText))
+          if (!lexTemplate(WordText))
             return false;
         } else {
           WordText.push_back(C);
@@ -263,6 +306,12 @@ class TextTokenRetokenizer {
           consumeChar();
           break;
         } else {
+          if (isInt(WordText)) {
+            WordText.push_back(C);
+            consumeChar();
+            if (!lexInt(WordText, NextToken))
+              return false;
+          }
           if (continueParsing(WordText)) {
             WordText.push_back(C);
             consumeChar();
diff --git a/clang/unittests/AST/CommentParser.cpp 
b/clang/unittests/AST/CommentParser.cpp
index e2e77dd95a9a5..385a98e2a036b 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1747,6 +1747,48 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg13) {
   }
 }
 
+TEST_F(CommentParserTest, ThrowsCommandHasArg14) {
+  const char *Sources[] = {
+      "/// @throws unsigned long at least a 32-bit integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "unsigned long");
+    }
+  }
+}
+
+TEST_F(CommentParserTest, ThrowsCommandHasArg15) {
+  const char *Sources[] = {
+      "/// @throws unsigned long long at least a 64-bit integer",
+  };
+
+  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
+    FullComment *FC = parseString(Sources[i]);
+    ASSERT_TRUE(HasChildCount(FC, 2));
+
+    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
+    {
+      BlockCommandComment *BCC;
+      ParagraphComment *PC;
+      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
+      ASSERT_TRUE(HasChildCount(PC, 1));
+      ASSERT_TRUE(BCC->getNumArgs() == 1);
+      ASSERT_TRUE(BCC->getArgText(0) == "unsigned long long");
+    }
+  }
+}
+
 } // unnamed namespace
 
 } // end namespace comments

>From 64f93cc6aac2ccd826ab48a29364c00f6f62da0a Mon Sep 17 00:00:00 2001
From: hdoc <git...@hdoc.io>
Date: Tue, 2 Apr 2024 21:37:53 -0700
Subject: [PATCH 6/7] Address review feedback

- Refactored some functionality with the help of clang-tidy
- Added comments to the complicated lexing functions
- Renamed variables to follow LLVM/Clang conventions
---
 clang/lib/AST/CommentParser.cpp | 118 +++++++++++++++++++-------------
 1 file changed, 71 insertions(+), 47 deletions(-)

diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index 63baebe59cdfd..f6d2ff55e3beb 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -78,7 +78,7 @@ class TextTokenRetokenizer {
   char peekNext(unsigned offset) const {
     assert(!isEnd());
     assert(Pos.BufferPtr != Pos.BufferEnd);
-    if (Pos.BufferPtr + offset <= Pos.BufferEnd) {
+    if (Pos.BufferPtr + offset < Pos.BufferEnd) {
       return *(Pos.BufferPtr + offset);
     } else {
       return '\0';
@@ -108,7 +108,7 @@ class TextTokenRetokenizer {
     }
   }
 
-  bool continueInt(SmallString<32> &NextToken) {
+  bool shouldContinueLexingIntegralType(SmallString<32> &NextToken) {
     return NextToken.ends_with(StringRef("char")) ||
            NextToken.ends_with(StringRef("int")) ||
            NextToken.ends_with(StringRef("char*")) ||
@@ -117,9 +117,10 @@ class TextTokenRetokenizer {
            NextToken.ends_with(StringRef("int&"));
   }
 
-  bool lexInt(SmallString<32> &WordText, SmallString<32> &NextToken) {
+  /// Lex an integral type, such as unsigned long long, etc.
+  bool lexIntegral(SmallString<32> &WordText, SmallString<32> &NextToken) {
     unsigned LongCounter = (WordText.ends_with(StringRef("long"))) ? 1 : 0;
-    bool complete = false;
+    bool IsLexingComplete = false;
 
     while (!isEnd()) {
       const char C = peek();
@@ -127,29 +128,33 @@ class TextTokenRetokenizer {
         WordText.push_back(C);
         consumeChar();
       } else {
-
         NextToken.clear();
         peekNextToken(NextToken);
 
         if (WordText.ends_with(StringRef("long"))) {
           LongCounter++;
-          if (continueInt(NextToken)) {
+          // Use the next token to determine if we should continue parsing
+          if (shouldContinueLexingIntegralType(NextToken)) {
             WordText.push_back(C);
             consumeChar();
-            complete = true;
+            IsLexingComplete = true;
             continue;
-          } else {
-            if (LongCounter == 2) {
-              return true;
-            }
           }
-        } else {
+          // Maximum number of consecutive "long" is 2, so we can return if
+          // we've hit that.
+          if (LongCounter == 2) {
+            return true;
+          }
+        }
 
-          if (complete || continueInt(WordText)) {
+        // If current word doesn't end with long, check if we should exit early
+        else {
+          if (IsLexingComplete || shouldContinueLexingIntegralType(WordText)) {
             return true;
           }
         }
 
+        // If next token ends with long then we consume it and continue parsing
         if (NextToken.ends_with(StringRef("long"))) {
           WordText.push_back(C);
           consumeChar();
@@ -206,7 +211,7 @@ class TextTokenRetokenizer {
     return WordText.ends_with(StringRef("::"));
   }
 
-  bool isInt(SmallString<32> &WordText) {
+  bool isIntegral(SmallString<32> &WordText) {
     return WordText.ends_with(StringRef("unsigned")) ||
            WordText.ends_with(StringRef("long")) ||
            WordText.ends_with(StringRef("signed"));
@@ -280,7 +285,12 @@ class TextTokenRetokenizer {
   bool lexType(Token &Tok) {
     if (isEnd())
       return false;
+
+    // Save current position in case we need to rollback because the type is
+    // empty.
     Position SavedPos = Pos;
+
+    // Consume any leading whitespace.
     consumeWhitespace();
     SmallString<32> NextToken;
     SmallString<32> WordText;
@@ -289,10 +299,12 @@ class TextTokenRetokenizer {
     StringRef ConstVal = StringRef("const");
     StringRef PointerVal = StringRef("*");
     StringRef ReferenceVal = StringRef("&");
-    bool ConstPointer = false;
+    bool IsTypeConstPointerOrRef = false;
 
     while (!isEnd()) {
       const char C = peek();
+      // For non-whitespace characters we check if it's a template or otherwise
+      // continue reading the text into a word.
       if (!isWhitespace(C)) {
         if (C == '<') {
           if (!lexTemplate(WordText))
@@ -301,47 +313,59 @@ class TextTokenRetokenizer {
           WordText.push_back(C);
           consumeChar();
         }
-      } else {
-        if (ConstPointer) {
+      }
+      // For whitespace, we start inspecting the constructed word
+      else {
+        // If we encounter a pointer/reference, we can stop parsing since we're
+        // only parsing expressions.
+        if (IsTypeConstPointerOrRef) {
           consumeChar();
           break;
-        } else {
-          if (isInt(WordText)) {
-            WordText.push_back(C);
-            consumeChar();
-            if (!lexInt(WordText, NextToken))
-              return false;
-          }
-          if (continueParsing(WordText)) {
-            WordText.push_back(C);
-            consumeChar();
+        }
+        // Parse out integral types
+        if (isIntegral(WordText)) {
+          WordText.push_back(C);
+          consumeChar();
+          if (!lexIntegral(WordText, NextToken))
+            return false;
+        }
+        // Certain types, like qualified names or types with CVR to name a few,
+        // may have whitespace inside of the typename, so we need to check and
+        // continue parsing if that's the case
+        if (continueParsing(WordText)) {
+          WordText.push_back(C);
+          consumeChar();
+        }
+        // Handles cases without qualified names or type qualifiers
+        else {
+          NextToken.clear();
+          peekNextToken(NextToken);
+          // Check for pointer/ref vals, and mark the type as a pointer/ref for
+          // the rest of the lex
+          if (WordText.ends_with(PointerVal) ||
+              WordText.ends_with(ReferenceVal)) {
+            if (NextToken.equals(ConstVal)) {
+              IsTypeConstPointerOrRef = true;
+              WordText.push_back(C);
+              consumeChar();
+            } else {
+              consumeChar();
+              break;
+            }
           } else {
-            NextToken.clear();
-            peekNextToken(NextToken);
-            if (WordText.ends_with(PointerVal) ||
-                WordText.ends_with(ReferenceVal)) {
-              if (NextToken.equals(ConstVal)) {
-                ConstPointer = true;
+            // Check if the next token is a pointer/ref
+            if ((NextToken.ends_with(PointerVal) ||
+                 NextToken.ends_with(ReferenceVal))) {
+              WordText.push_back(C);
+              consumeChar();
+            } else {
+              if (continueParsing(NextToken)) {
                 WordText.push_back(C);
                 consumeChar();
               } else {
                 consumeChar();
                 break;
               }
-            } else {
-              if ((NextToken.ends_with(PointerVal) ||
-                   NextToken.ends_with(ReferenceVal))) {
-                WordText.push_back(C);
-                consumeChar();
-              } else {
-                if (continueParsing(NextToken)) {
-                  WordText.push_back(C);
-                  consumeChar();
-                } else {
-                  consumeChar();
-                  break;
-                }
-              }
             }
           }
         }

>From 87aeca8df94a745090933cac282a7ddde75ffb39 Mon Sep 17 00:00:00 2001
From: hdoc <git...@hdoc.io>
Date: Wed, 29 May 2024 15:06:41 +0200
Subject: [PATCH 7/7] Parse only the space-delimited word after @throw

This is in response to a request by upstream to simplify the parser
previously developed.
---
 clang/include/clang/AST/CommentParser.h |   2 +
 clang/lib/AST/CommentParser.cpp         | 174 +---------------
 clang/unittests/AST/CommentParser.cpp   | 252 +-----------------------
 3 files changed, 9 insertions(+), 419 deletions(-)

diff --git a/clang/include/clang/AST/CommentParser.h 
b/clang/include/clang/AST/CommentParser.h
index 5884a25d00785..636bd40aeaa7f 100644
--- a/clang/include/clang/AST/CommentParser.h
+++ b/clang/include/clang/AST/CommentParser.h
@@ -100,6 +100,8 @@ class Parser {
   ArrayRef<Comment::Argument>
   parseCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
 
+  /// Parse arguments for \\throws command supported args are in form of class
+  /// or template.
   ArrayRef<Comment::Argument>
   parseThrowCommandArgs(TextTokenRetokenizer &Retokenizer, unsigned NumArgs);
 
diff --git a/clang/lib/AST/CommentParser.cpp b/clang/lib/AST/CommentParser.cpp
index f6d2ff55e3beb..098866b358135 100644
--- a/clang/lib/AST/CommentParser.cpp
+++ b/clang/lib/AST/CommentParser.cpp
@@ -75,25 +75,6 @@ class TextTokenRetokenizer {
     return *Pos.BufferPtr;
   }
 
-  char peekNext(unsigned offset) const {
-    assert(!isEnd());
-    assert(Pos.BufferPtr != Pos.BufferEnd);
-    if (Pos.BufferPtr + offset < Pos.BufferEnd) {
-      return *(Pos.BufferPtr + offset);
-    } else {
-      return '\0';
-    }
-  }
-
-  void peekNextToken(SmallString<32> &WordText) const {
-    unsigned offset = 1;
-    char C = peekNext(offset++);
-    while (!isWhitespace(C) && C != '\0') {
-      WordText.push_back(C);
-      C = peekNext(offset++);
-    }
-  }
-
   void consumeChar() {
     assert(!isEnd());
     assert(Pos.BufferPtr != Pos.BufferEnd);
@@ -108,65 +89,6 @@ class TextTokenRetokenizer {
     }
   }
 
-  bool shouldContinueLexingIntegralType(SmallString<32> &NextToken) {
-    return NextToken.ends_with(StringRef("char")) ||
-           NextToken.ends_with(StringRef("int")) ||
-           NextToken.ends_with(StringRef("char*")) ||
-           NextToken.ends_with(StringRef("int*")) ||
-           NextToken.ends_with(StringRef("char&")) ||
-           NextToken.ends_with(StringRef("int&"));
-  }
-
-  /// Lex an integral type, such as unsigned long long, etc.
-  bool lexIntegral(SmallString<32> &WordText, SmallString<32> &NextToken) {
-    unsigned LongCounter = (WordText.ends_with(StringRef("long"))) ? 1 : 0;
-    bool IsLexingComplete = false;
-
-    while (!isEnd()) {
-      const char C = peek();
-      if (!isWhitespace(C)) {
-        WordText.push_back(C);
-        consumeChar();
-      } else {
-        NextToken.clear();
-        peekNextToken(NextToken);
-
-        if (WordText.ends_with(StringRef("long"))) {
-          LongCounter++;
-          // Use the next token to determine if we should continue parsing
-          if (shouldContinueLexingIntegralType(NextToken)) {
-            WordText.push_back(C);
-            consumeChar();
-            IsLexingComplete = true;
-            continue;
-          }
-          // Maximum number of consecutive "long" is 2, so we can return if
-          // we've hit that.
-          if (LongCounter == 2) {
-            return true;
-          }
-        }
-
-        // If current word doesn't end with long, check if we should exit early
-        else {
-          if (IsLexingComplete || shouldContinueLexingIntegralType(WordText)) {
-            return true;
-          }
-        }
-
-        // If next token ends with long then we consume it and continue parsing
-        if (NextToken.ends_with(StringRef("long"))) {
-          WordText.push_back(C);
-          consumeChar();
-        } else {
-          return true;
-        }
-      }
-    }
-
-    return false;
-  }
-
   /// Extract a template type
   bool lexTemplate(SmallString<32> &WordText) {
     unsigned IncrementCounter = 0;
@@ -192,35 +114,6 @@ class TextTokenRetokenizer {
     return false;
   }
 
-  bool isTypeQualifier(SmallString<32> &WordText) {
-    return WordText.ends_with(StringRef("const")) ||
-           WordText.ends_with(StringRef("volatile")) ||
-           WordText.ends_with(StringRef("short")) ||
-           WordText.ends_with(StringRef("restrict")) ||
-           WordText.ends_with(StringRef("auto")) ||
-           WordText.ends_with(StringRef("register")) ||
-           WordText.ends_with(StringRef("static")) ||
-           WordText.ends_with(StringRef("extern")) ||
-           WordText.ends_with(StringRef("struct")) ||
-           WordText.ends_with(StringRef("typedef")) ||
-           WordText.ends_with(StringRef("union")) ||
-           WordText.ends_with(StringRef("void"));
-  }
-
-  bool isScopeResolutionOperator(SmallString<32> &WordText) {
-    return WordText.ends_with(StringRef("::"));
-  }
-
-  bool isIntegral(SmallString<32> &WordText) {
-    return WordText.ends_with(StringRef("unsigned")) ||
-           WordText.ends_with(StringRef("long")) ||
-           WordText.ends_with(StringRef("signed"));
-  }
-
-  bool continueParsing(SmallString<32> &WordText) {
-    return isTypeQualifier(WordText) || isScopeResolutionOperator(WordText);
-  }
-
   /// Add a token.
   /// Returns true on success, false if there are no interesting tokens to
   /// fetch from lexer.
@@ -292,14 +185,9 @@ class TextTokenRetokenizer {
 
     // Consume any leading whitespace.
     consumeWhitespace();
-    SmallString<32> NextToken;
     SmallString<32> WordText;
     const char *WordBegin = Pos.BufferPtr;
     SourceLocation Loc = getSourceLocation();
-    StringRef ConstVal = StringRef("const");
-    StringRef PointerVal = StringRef("*");
-    StringRef ReferenceVal = StringRef("&");
-    bool IsTypeConstPointerOrRef = false;
 
     while (!isEnd()) {
       const char C = peek();
@@ -313,62 +201,9 @@ class TextTokenRetokenizer {
           WordText.push_back(C);
           consumeChar();
         }
-      }
-      // For whitespace, we start inspecting the constructed word
-      else {
-        // If we encounter a pointer/reference, we can stop parsing since we're
-        // only parsing expressions.
-        if (IsTypeConstPointerOrRef) {
-          consumeChar();
-          break;
-        }
-        // Parse out integral types
-        if (isIntegral(WordText)) {
-          WordText.push_back(C);
-          consumeChar();
-          if (!lexIntegral(WordText, NextToken))
-            return false;
-        }
-        // Certain types, like qualified names or types with CVR to name a few,
-        // may have whitespace inside of the typename, so we need to check and
-        // continue parsing if that's the case
-        if (continueParsing(WordText)) {
-          WordText.push_back(C);
-          consumeChar();
-        }
-        // Handles cases without qualified names or type qualifiers
-        else {
-          NextToken.clear();
-          peekNextToken(NextToken);
-          // Check for pointer/ref vals, and mark the type as a pointer/ref for
-          // the rest of the lex
-          if (WordText.ends_with(PointerVal) ||
-              WordText.ends_with(ReferenceVal)) {
-            if (NextToken.equals(ConstVal)) {
-              IsTypeConstPointerOrRef = true;
-              WordText.push_back(C);
-              consumeChar();
-            } else {
-              consumeChar();
-              break;
-            }
-          } else {
-            // Check if the next token is a pointer/ref
-            if ((NextToken.ends_with(PointerVal) ||
-                 NextToken.ends_with(ReferenceVal))) {
-              WordText.push_back(C);
-              consumeChar();
-            } else {
-              if (continueParsing(NextToken)) {
-                WordText.push_back(C);
-                consumeChar();
-              } else {
-                consumeChar();
-                break;
-              }
-            }
-          }
-        }
+      } else {
+        consumeChar();
+        break;
       }
     }
 
@@ -462,8 +297,7 @@ class TextTokenRetokenizer {
     memcpy(TextPtr, WordText.c_str(), Length + 1);
     StringRef Text = StringRef(TextPtr, Length);
 
-    formTokenWithChars(Tok, Loc, WordBegin,
-                       Pos.BufferPtr - WordBegin, Text);
+    formTokenWithChars(Tok, Loc, WordBegin, Pos.BufferPtr - WordBegin, Text);
     return true;
   }
 
diff --git a/clang/unittests/AST/CommentParser.cpp 
b/clang/unittests/AST/CommentParser.cpp
index 385a98e2a036b..8071bb69ab867 100644
--- a/clang/unittests/AST/CommentParser.cpp
+++ b/clang/unittests/AST/CommentParser.cpp
@@ -1459,103 +1459,6 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg1) {
 }
 
 TEST_F(CommentParserTest, ThrowsCommandHasArg2) {
-  const char *Sources[] = {
-      "/// @throws const int This function throws a const integer",
-      ("/// @throws\n"
-       "/// const int This function throws a const integer"),
-      ("/// @throws \n"
-       "/// const int This function throws a const integer"),
-      ("/// @throws\n"
-       "/// const int\n"
-       "/// This function throws a const integer"),
-      ("/// @throws \n"
-       "/// const int \n"
-       "/// This function throws a const integer"),
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "const int");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg3) {
-  const char *Sources[] = {
-      "/// @throws const int * This function throws a pointer to a const "
-      "integer\n",
-      ("/// @throws\n"
-       "/// const int * This function throws a pointer to a const integer"),
-      ("/// @throws \n"
-       "/// const int * This function throws a pointer to a const integer"),
-      ("/// @throws\n"
-       "/// const int *\n"
-       "/// This function throws a pointer to a const integer"),
-      ("/// @throws \n"
-       "/// const int *\n"
-       "/// This function throws a pointer to a const integer"),
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "const int *");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg4) {
-  const char *Sources[] = {
-      "/// @throws const int * const This function throws a const pointer to a 
"
-      "const integer",
-      ("/// @throws\n"
-       "/// const int * const This function throws a const pointer to a const "
-       "integer"),
-      ("/// @throws \n"
-       "/// const int * const This function throws a const pointer to a const "
-       "integer"),
-      ("/// @throws\n"
-       "/// const int * const\n"
-       "/// This function throws a const pointer to a const integer"),
-      ("/// @throws \n"
-       "/// const int * const\n"
-       "/// This function throws a const pointer to a const integer"),
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "const int * const");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg5) {
   const char *Sources[] = {
       "/// @throws int** This function throws a double pointer to an integer",
   };
@@ -1576,28 +1479,7 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg5) {
   }
 }
 
-TEST_F(CommentParserTest, ThrowsCommandHasArg6) {
-  const char *Sources[] = {
-      "/// @throws const char ** double pointer to a constant char pointer",
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "const char **");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg7) {
+TEST_F(CommentParserTest, ThrowsCommandHasArg3) {
   const char *Sources[] = {
       "/// @throws Error<T> error of type Error<T>",
   };
@@ -1619,7 +1501,7 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg7) {
   }
 }
 
-TEST_F(CommentParserTest, ThrowsCommandHasArg8) {
+TEST_F(CommentParserTest, ThrowsCommandHasArg4) {
   const char *Sources[] = {
       "/// @throws Error<Container<T>> nested templates",
   };
@@ -1640,7 +1522,7 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg8) {
   }
 }
 
-TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
+TEST_F(CommentParserTest, ThrowsCommandHasArg5) {
   const char *Sources[] = {
       "/// @throws Error<Ts...> variadic templates",
   };
@@ -1661,134 +1543,6 @@ TEST_F(CommentParserTest, ThrowsCommandHasArg9) {
   }
 }
 
-TEST_F(CommentParserTest, ThrowsCommandHasArg10) {
-  const char *Sources[] = {
-      "/// @throws const std::map<int, std::string> * pointer to a const map",
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "const std::map<int, std::string> *");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg11) {
-  const char *Sources[] = {
-      "/// @throws const std :: map<int, std :: string> * pointer to a "
-      "const map with spaces",
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) ==
-                  "const std :: map<int, std :: string> *");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg12) {
-  const char *Sources[] = {
-      "/// @throws volatile int a volatile integer",
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "volatile int");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg13) {
-  const char *Sources[] = {
-      "/// @throws volatile double * volatile pointer to a double",
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "volatile double *");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg14) {
-  const char *Sources[] = {
-      "/// @throws unsigned long at least a 32-bit integer",
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "unsigned long");
-    }
-  }
-}
-
-TEST_F(CommentParserTest, ThrowsCommandHasArg15) {
-  const char *Sources[] = {
-      "/// @throws unsigned long long at least a 64-bit integer",
-  };
-
-  for (size_t i = 0, e = std::size(Sources); i != e; i++) {
-    FullComment *FC = parseString(Sources[i]);
-    ASSERT_TRUE(HasChildCount(FC, 2));
-
-    ASSERT_TRUE(HasParagraphCommentAt(FC, 0, " "));
-    {
-      BlockCommandComment *BCC;
-      ParagraphComment *PC;
-      ASSERT_TRUE(HasBlockCommandAt(FC, Traits, 1, BCC, "throws", PC));
-      ASSERT_TRUE(HasChildCount(PC, 1));
-      ASSERT_TRUE(BCC->getNumArgs() == 1);
-      ASSERT_TRUE(BCC->getArgText(0) == "unsigned long long");
-    }
-  }
-}
-
 } // unnamed namespace
 
 } // end namespace comments

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [Clang][Comments] Add argument parsing for @throw @throws @exception (PR #84726)

Reply via email to