https://github.com/hbatagelo updated https://github.com/llvm/llvm-project/pull/196861
>From a740c3c7b75ef1c00e4ae974be4a3227e0b5d1ec Mon Sep 17 00:00:00 2001 From: Harlen Batagelo <[email protected]> Date: Sun, 10 May 2026 21:49:41 -0300 Subject: [PATCH 1/2] Synthesize missing eof token --- clang/lib/Tooling/Syntax/Tokens.cpp | 10 +++++ clang/unittests/Tooling/Syntax/TokensTest.cpp | 42 +++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 260654a0701fd..e6ca70e0cfb29 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -712,6 +712,16 @@ class TokenCollector::Builder { TokenBuffer build() && { assert(!Result.ExpandedTokens.empty()); + + // When the parser hits a hard limit (e.g. bracket depth or function scope + // depth), it halts prematurely and leaves the expanded token stream + // truncated with no final `eof` token. To keep the invariant, synthesize an + // `eof` at the location of the last collected token. + if (Result.ExpandedTokens.back().kind() != tok::eof) { + SourceLocation Loc = Result.ExpandedTokens.back().location(); + Result.ExpandedTokens.emplace_back(Loc, 0, tok::eof); + } + assert(Result.ExpandedTokens.back().kind() == tok::eof); // Tokenize every file that contributed tokens to the expanded stream. diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 468ca5ddd2c75..ae84bda5b228b 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -92,7 +92,8 @@ class TokenCollectorTest : public ::testing::Test { /// Run the clang frontend, collect the preprocessed tokens from the frontend /// invocation and store them in this->Buffer. /// This also clears SourceManager before running the compiler. - void recordTokens(llvm::StringRef Code) { + void recordTokens(llvm::StringRef Code, + llvm::ArrayRef<const char *> ExtraArgs = {}) { class RecordTokens : public ASTFrontendAction { public: explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} @@ -123,8 +124,10 @@ class TokenCollectorTest : public ::testing::Test { // Prepare to run a compiler. if (!Diags->getClient()) Diags->setClient(new IgnoringDiagConsumer); - std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", - FileName}; + std::vector<const char *> Args = {"tok-test", "-std=c++03", + "-fsyntax-only"}; + Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end()); + Args.push_back(FileName); CreateInvocationOptions CIOpts; CIOpts.Diags = Diags; CIOpts.VFS = FS; @@ -1148,4 +1151,37 @@ TEST_F(TokenCollectorTest, Pragmas) { } )cpp"); } + +TEST_F(TokenBufferTest, EofTokenOnFunctionScopeDepthLimit) { + static_assert(ParmVarDecl::getMaxFunctionScopeDepth() == 127, + "Test input relies on a max depth of 127"); + + // Force parser to bail out due to exceeding the function scope depth limit. + // https://github.com/llvm/llvm-project/issues/196244 + recordTokens(R"cpp( + #define L [](int= + #define L4 L L L L + #define L16 L4 L4 L4 L4 + #define L64 L16 L16 L16 L16 + + void foo() { + L64 L64 L + } + )cpp"); + + ASSERT_GE(Buffer.expandedTokens().size(), 2u); + // The stream is truncated but ends with an `eof`. + EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof); + EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::kw_int); +} + +TEST_F(TokenBufferTest, EofTokenOnBracketDepthLimit) { + // Force parser to bail out due to exceeding the bracket depth limit. + recordTokens("((;", {"-fbracket-depth=1"}); + + ASSERT_GE(Buffer.expandedTokens().size(), 2u); + // The stream is truncated but ends with an `eof`. + EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof); + EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::l_paren); +} } // namespace >From 81e668b1ec63858884237789342604f55d4d76de Mon Sep 17 00:00:00 2001 From: Harlen Batagelo <[email protected]> Date: Mon, 11 May 2026 10:30:10 -0300 Subject: [PATCH 2/2] Remove redundant test case and assertion --- clang/lib/Tooling/Syntax/Tokens.cpp | 2 -- clang/unittests/Tooling/Syntax/TokensTest.cpp | 23 ------------------- 2 files changed, 25 deletions(-) diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index e6ca70e0cfb29..9ad8a149675d9 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -722,8 +722,6 @@ class TokenCollector::Builder { Result.ExpandedTokens.emplace_back(Loc, 0, tok::eof); } - assert(Result.ExpandedTokens.back().kind() == tok::eof); - // Tokenize every file that contributed tokens to the expanded stream. buildSpelledTokens(); diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index ae84bda5b228b..8af9308828c28 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -1152,29 +1152,6 @@ TEST_F(TokenCollectorTest, Pragmas) { )cpp"); } -TEST_F(TokenBufferTest, EofTokenOnFunctionScopeDepthLimit) { - static_assert(ParmVarDecl::getMaxFunctionScopeDepth() == 127, - "Test input relies on a max depth of 127"); - - // Force parser to bail out due to exceeding the function scope depth limit. - // https://github.com/llvm/llvm-project/issues/196244 - recordTokens(R"cpp( - #define L [](int= - #define L4 L L L L - #define L16 L4 L4 L4 L4 - #define L64 L16 L16 L16 L16 - - void foo() { - L64 L64 L - } - )cpp"); - - ASSERT_GE(Buffer.expandedTokens().size(), 2u); - // The stream is truncated but ends with an `eof`. - EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof); - EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::kw_int); -} - TEST_F(TokenBufferTest, EofTokenOnBracketDepthLimit) { // Force parser to bail out due to exceeding the bracket depth limit. recordTokens("((;", {"-fbracket-depth=1"}); _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
