[clang-tools-extra] [clang-pseudo] Add a --print-terminal-tokens option (PR #87898)
https://github.com/jeremy-rifkin updated https://github.com/llvm/llvm-project/pull/87898 >From 2ebb15e08b5e2d8a9fe6cfddbe0dd2a8942b2542 Mon Sep 17 00:00:00 2001 From: Jeremy <51220084+jeremy-rif...@users.noreply.github.com> Date: Sat, 6 Apr 2024 17:02:20 -0500 Subject: [PATCH 1/3] Add a --print-terminal-tokens option --- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp| 2 +- .../pseudo/include/clang-pseudo/Forest.h | 11 ++-- clang-tools-extra/pseudo/lib/Forest.cpp | 26 +-- clang-tools-extra/pseudo/tool/ClangPseudo.cpp | 12 +++-- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp index 87b9d15480cc35..33b3da1ed6ea9f 100644 --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -46,7 +46,7 @@ class Fuzzer { glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS}, *Lang.G.findNonterminal("translation-unit"), Lang); if (Print) - llvm::outs() << Root.dumpRecursive(Lang.G); + llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt); } }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h index e9edb40e02b64e..642c489b3fba41 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include +#include +#include namespace clang { namespace pseudo { @@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode { // Iteration over all nodes in the forest, including this. llvm::iterator_range descendants() const; - std::string dump(const Grammar &) const; - std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; + std::string + dump(const Grammar &, + std::optional>) const; + std::string + dumpRecursive(const Grammar &, +std::optional>, +bool Abbreviated = false) const; private: friend class ForestArena; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp index e8e60e5ec475a4..adce731d6c1e1c 100644 --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -45,13 +45,21 @@ ForestNode::descendants() const { return {RecursiveIterator(this), RecursiveIterator()}; } -std::string ForestNode::dump(const Grammar ) const { +std::string ForestNode::dump( +const Grammar , +std::optional> Code) const { switch (kind()) { case Ambiguous: return llvm::formatv("{0} := ", G.symbolName(symbol())); case Terminal: -return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), - startTokenIndex()); +if (Code) { + return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()), + startTokenIndex(), + Code->get().tokens()[startTokenIndex()]); +} else { + return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), + startTokenIndex()); +} case Sequence: return G.dumpRule(rule()); case Opaque: @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar ) const { llvm_unreachable("Unhandled node kind!"); } -std::string ForestNode::dumpRecursive(const Grammar , - bool Abbreviated) const { +std::string ForestNode::dumpRecursive( +const Grammar , +std::optional> Code, +bool Abbreviated) const { using llvm::formatv; Token::Index MaxToken = 0; // Count visits of nodes so we can mark those seen multiple times. @@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar , std::string Result; constexpr Token::Index KEnd = std::numeric_limits::max(); std::function, - LineDecoration )> + LineDecoration LineDec)> Dump = [&](const ForestNode *P, Token::Index End, std::optional ElidedParent, LineDecoration LineDec) { bool SharedNode = VisitCounts.find(P)->getSecond() > 1; @@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar , // The first time, print as #1. Later, =#1. if (First) { -Result += formatv("{0} #{1}", P->dump(G), ID); +Result += formatv("{0} #{1}", P->dump(G, Code), ID); } else { Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID); Children = {}; // Don't walk the children again. } } else { - Result.append(P->dump(G)); + Result.append(P->dump(G, Code)); } Result.push_back('\n'); diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index
[clang-tools-extra] [clang-pseudo] Add a --print-terminal-tokens option (PR #87898)
https://github.com/jeremy-rifkin updated https://github.com/llvm/llvm-project/pull/87898 >From 2ebb15e08b5e2d8a9fe6cfddbe0dd2a8942b2542 Mon Sep 17 00:00:00 2001 From: Jeremy <51220084+jeremy-rif...@users.noreply.github.com> Date: Sat, 6 Apr 2024 17:02:20 -0500 Subject: [PATCH 1/2] Add a --print-terminal-tokens option --- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp| 2 +- .../pseudo/include/clang-pseudo/Forest.h | 11 ++-- clang-tools-extra/pseudo/lib/Forest.cpp | 26 +-- clang-tools-extra/pseudo/tool/ClangPseudo.cpp | 12 +++-- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp index 87b9d15480cc35..33b3da1ed6ea9f 100644 --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -46,7 +46,7 @@ class Fuzzer { glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS}, *Lang.G.findNonterminal("translation-unit"), Lang); if (Print) - llvm::outs() << Root.dumpRecursive(Lang.G); + llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt); } }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h index e9edb40e02b64e..642c489b3fba41 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include +#include +#include namespace clang { namespace pseudo { @@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode { // Iteration over all nodes in the forest, including this. llvm::iterator_range descendants() const; - std::string dump(const Grammar &) const; - std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; + std::string + dump(const Grammar &, + std::optional>) const; + std::string + dumpRecursive(const Grammar &, +std::optional>, +bool Abbreviated = false) const; private: friend class ForestArena; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp index e8e60e5ec475a4..adce731d6c1e1c 100644 --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -45,13 +45,21 @@ ForestNode::descendants() const { return {RecursiveIterator(this), RecursiveIterator()}; } -std::string ForestNode::dump(const Grammar ) const { +std::string ForestNode::dump( +const Grammar , +std::optional> Code) const { switch (kind()) { case Ambiguous: return llvm::formatv("{0} := ", G.symbolName(symbol())); case Terminal: -return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), - startTokenIndex()); +if (Code) { + return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()), + startTokenIndex(), + Code->get().tokens()[startTokenIndex()]); +} else { + return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), + startTokenIndex()); +} case Sequence: return G.dumpRule(rule()); case Opaque: @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar ) const { llvm_unreachable("Unhandled node kind!"); } -std::string ForestNode::dumpRecursive(const Grammar , - bool Abbreviated) const { +std::string ForestNode::dumpRecursive( +const Grammar , +std::optional> Code, +bool Abbreviated) const { using llvm::formatv; Token::Index MaxToken = 0; // Count visits of nodes so we can mark those seen multiple times. @@ -95,7 +105,7 @@ std::string ForestNode::dumpRecursive(const Grammar , std::string Result; constexpr Token::Index KEnd = std::numeric_limits::max(); std::function, - LineDecoration )> + LineDecoration LineDec)> Dump = [&](const ForestNode *P, Token::Index End, std::optional ElidedParent, LineDecoration LineDec) { bool SharedNode = VisitCounts.find(P)->getSecond() > 1; @@ -145,13 +155,13 @@ std::string ForestNode::dumpRecursive(const Grammar , // The first time, print as #1. Later, =#1. if (First) { -Result += formatv("{0} #{1}", P->dump(G), ID); +Result += formatv("{0} #{1}", P->dump(G, Code), ID); } else { Result += formatv("{0} =#{1}", G.symbolName(P->symbol()), ID); Children = {}; // Don't walk the children again. } } else { - Result.append(P->dump(G)); + Result.append(P->dump(G, Code)); } Result.push_back('\n'); diff --git a/clang-tools-extra/pseudo/tool/ClangPseudo.cpp b/clang-tools-extra/pseudo/tool/ClangPseudo.cpp index
[clang-tools-extra] [clang-pseudo] Add a --print-terminal-tokens option (PR #87898)
llvmbot wrote: @llvm/pr-subscribers-clang-tools-extra Author: Jeremy Rifkin (jeremy-rifkin) Changes This PR adds a `--print-terminal-tokens` option to clang-pseudo which prints tokens in a parse forest in addition to providing the token index: ``` › bin/clang-pseudo --source test.cpp --print-forest [ 0, end) translation-unit~simple-declaration := decl-specifier-seq init-declarator-list ; [ 0, 1) ├─decl-specifier-seq~simple-type-specifier := ambiguous [ 0, 1) │ ├─simple-type-specifier~IDENTIFIER := tok[0] [ 0, 1) │ └─simple-type-specifier~IDENTIFIER := tok[0] [ 1, 3) ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator [ 1, 2) │ ├─ptr-operator~* := tok[1] [ 2, 3) │ └─ptr-declarator~IDENTIFIER := tok[2] [ 3, end) └─; := tok[3] ``` ``` › bin/clang-pseudo --source test.cpp --print-forest --print-terminal-tokens [ 0, end) translation-unit~simple-declaration := decl-specifier-seq init-declarator-list ; [ 0, 1) ├─decl-specifier-seq~simple-type-specifier := ambiguous [ 0, 1) │ ├─simple-type-specifier~IDENTIFIER := tok[0] (identifier 1:0 "T" flags=1) [ 0, 1) │ └─simple-type-specifier~IDENTIFIER := tok[0] (identifier 1:0 "T" flags=1) [ 1, 3) ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator [ 1, 2) │ ├─ptr-operator~* := tok[1] (star 1:0 "*") [ 2, 3) │ └─ptr-declarator~IDENTIFIER := tok[2] (identifier 1:0 "y") [ 3, end) └─; := tok[3] (semi 1:0 ";") ``` --- Full diff: https://github.com/llvm/llvm-project/pull/87898.diff 4 Files Affected: - (modified) clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp (+1-1) - (modified) clang-tools-extra/pseudo/include/clang-pseudo/Forest.h (+9-2) - (modified) clang-tools-extra/pseudo/lib/Forest.cpp (+18-8) - (modified) clang-tools-extra/pseudo/tool/ClangPseudo.cpp (+10-2) ``diff diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp index 87b9d15480cc35..33b3da1ed6ea9f 100644 --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -46,7 +46,7 @@ class Fuzzer { glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS}, *Lang.G.findNonterminal("translation-unit"), Lang); if (Print) - llvm::outs() << Root.dumpRecursive(Lang.G); + llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt); } }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h index e9edb40e02b64e..642c489b3fba41 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include +#include +#include namespace clang { namespace pseudo { @@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode { // Iteration over all nodes in the forest, including this. llvm::iterator_range descendants() const; - std::string dump(const Grammar &) const; - std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; + std::string + dump(const Grammar &, + std::optional>) const; + std::string + dumpRecursive(const Grammar &, +std::optional>, +bool Abbreviated = false) const; private: friend class ForestArena; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp index e8e60e5ec475a4..adce731d6c1e1c 100644 --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -45,13 +45,21 @@ ForestNode::descendants() const { return {RecursiveIterator(this), RecursiveIterator()}; } -std::string ForestNode::dump(const Grammar ) const { +std::string ForestNode::dump( +const Grammar , +std::optional> Code) const { switch (kind()) { case Ambiguous: return llvm::formatv("{0} := ", G.symbolName(symbol())); case Terminal: -return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), - startTokenIndex()); +if (Code) { + return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()), + startTokenIndex(), + Code->get().tokens()[startTokenIndex()]); +} else { + return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), + startTokenIndex()); +} case Sequence: return G.dumpRule(rule()); case Opaque: @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar ) const { llvm_unreachable("Unhandled node kind!"); } -std::string ForestNode::dumpRecursive(const Grammar , - bool Abbreviated) const { +std::string ForestNode::dumpRecursive( +const Grammar , +std::optional> Code, +bool Abbreviated) const { using llvm::formatv; Token::Index MaxToken = 0; // Count visits of nodes so we can mark
[clang-tools-extra] [clang-pseudo] Add a --print-terminal-tokens option (PR #87898)
github-actions[bot] wrote: Thank you for submitting a Pull Request (PR) to the LLVM Project! This PR will be automatically labeled and the relevant teams will be notified. If you wish to, you can add reviewers by using the "Reviewers" section on this page. If this is not working for you, it is probably because you do not have write permissions for the repository. In which case you can instead tag reviewers by name in a comment by using `@` followed by their GitHub username. If you have received no comments on your PR for a week, you can request a review by "ping"ing the PR by adding a comment “Ping”. The common courtesy "ping" rate is once a week. Please remember that you are asking for valuable time from other developers. If you have further questions, they may be answered by the [LLVM GitHub User Guide](https://llvm.org/docs/GitHub.html). You can also ask questions in a comment on this PR, on the [LLVM Discord](https://discord.com/invite/xS7Z362) or on the [forums](https://discourse.llvm.org/). https://github.com/llvm/llvm-project/pull/87898 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang-tools-extra] [clang-pseudo] Add a --print-terminal-tokens option (PR #87898)
https://github.com/jeremy-rifkin created https://github.com/llvm/llvm-project/pull/87898 This PR adds a `--print-terminal-tokens` option to clang-pseudo which prints tokens in a parse forest in addition to providing the token index: ``` › bin/clang-pseudo --source test.cpp --print-forest [ 0, end) translation-unit~simple-declaration := decl-specifier-seq init-declarator-list ; [ 0, 1) ├─decl-specifier-seq~simple-type-specifier := [ 0, 1) │ ├─simple-type-specifier~IDENTIFIER := tok[0] [ 0, 1) │ └─simple-type-specifier~IDENTIFIER := tok[0] [ 1, 3) ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator [ 1, 2) │ ├─ptr-operator~* := tok[1] [ 2, 3) │ └─ptr-declarator~IDENTIFIER := tok[2] [ 3, end) └─; := tok[3] ``` ``` › bin/clang-pseudo --source test.cpp --print-forest --print-terminal-tokens [ 0, end) translation-unit~simple-declaration := decl-specifier-seq init-declarator-list ; [ 0, 1) ├─decl-specifier-seq~simple-type-specifier := [ 0, 1) │ ├─simple-type-specifier~IDENTIFIER := tok[0] (identifier 1:0 "T" flags=1) [ 0, 1) │ └─simple-type-specifier~IDENTIFIER := tok[0] (identifier 1:0 "T" flags=1) [ 1, 3) ├─init-declarator-list~ptr-declarator := ptr-operator ptr-declarator [ 1, 2) │ ├─ptr-operator~* := tok[1] (star 1:0 "*") [ 2, 3) │ └─ptr-declarator~IDENTIFIER := tok[2] (identifier 1:0 "y") [ 3, end) └─; := tok[3] (semi 1:0 ";") ``` >From 2ebb15e08b5e2d8a9fe6cfddbe0dd2a8942b2542 Mon Sep 17 00:00:00 2001 From: Jeremy <51220084+jeremy-rif...@users.noreply.github.com> Date: Sat, 6 Apr 2024 17:02:20 -0500 Subject: [PATCH] Add a --print-terminal-tokens option --- clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp| 2 +- .../pseudo/include/clang-pseudo/Forest.h | 11 ++-- clang-tools-extra/pseudo/lib/Forest.cpp | 26 +-- clang-tools-extra/pseudo/tool/ClangPseudo.cpp | 12 +++-- 4 files changed, 38 insertions(+), 13 deletions(-) diff --git a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp index 87b9d15480cc35..33b3da1ed6ea9f 100644 --- a/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp +++ b/clang-tools-extra/pseudo/fuzzer/Fuzzer.cpp @@ -46,7 +46,7 @@ class Fuzzer { glrParse(clang::pseudo::ParseParams{ParseableStream, Arena, GSS}, *Lang.G.findNonterminal("translation-unit"), Lang); if (Print) - llvm::outs() << Root.dumpRecursive(Lang.G); + llvm::outs() << Root.dumpRecursive(Lang.G, std::nullopt); } }; diff --git a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h index e9edb40e02b64e..642c489b3fba41 100644 --- a/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h +++ b/clang-tools-extra/pseudo/include/clang-pseudo/Forest.h @@ -26,6 +26,8 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Allocator.h" #include +#include +#include namespace clang { namespace pseudo { @@ -112,8 +114,13 @@ class alignas(class ForestNode *) ForestNode { // Iteration over all nodes in the forest, including this. llvm::iterator_range descendants() const; - std::string dump(const Grammar &) const; - std::string dumpRecursive(const Grammar &, bool Abbreviated = false) const; + std::string + dump(const Grammar &, + std::optional>) const; + std::string + dumpRecursive(const Grammar &, +std::optional>, +bool Abbreviated = false) const; private: friend class ForestArena; diff --git a/clang-tools-extra/pseudo/lib/Forest.cpp b/clang-tools-extra/pseudo/lib/Forest.cpp index e8e60e5ec475a4..adce731d6c1e1c 100644 --- a/clang-tools-extra/pseudo/lib/Forest.cpp +++ b/clang-tools-extra/pseudo/lib/Forest.cpp @@ -45,13 +45,21 @@ ForestNode::descendants() const { return {RecursiveIterator(this), RecursiveIterator()}; } -std::string ForestNode::dump(const Grammar ) const { +std::string ForestNode::dump( +const Grammar , +std::optional> Code) const { switch (kind()) { case Ambiguous: return llvm::formatv("{0} := ", G.symbolName(symbol())); case Terminal: -return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), - startTokenIndex()); +if (Code) { + return llvm::formatv("{0} := tok[{1}] ({2})", G.symbolName(symbol()), + startTokenIndex(), + Code->get().tokens()[startTokenIndex()]); +} else { + return llvm::formatv("{0} := tok[{1}]", G.symbolName(symbol()), + startTokenIndex()); +} case Sequence: return G.dumpRule(rule()); case Opaque: @@ -60,8 +68,10 @@ std::string ForestNode::dump(const Grammar ) const { llvm_unreachable("Unhandled node kind!"); } -std::string ForestNode::dumpRecursive(const Grammar , - bool Abbreviated) const { +std::string ForestNode::dumpRecursive( +const Grammar , +std::optional> Code, +bool