https://github.com/hbatagelo created https://github.com/llvm/llvm-project/pull/196861
Fixes #196244. This PR addresses cases where this assertion is triggered in `TokenCollector::Builder::build()`: https://github.com/llvm/llvm-project/blob/dff356d47cfc4413f78c858dd8339cb1c9fca255/clang/lib/Tooling/Syntax/Tokens.cpp#L715 `TokenCollector` collects the expanded token stream by registering a token watcher callback in the preprocessor. Normally, the preprocessor calls the callback for every token up to and including the `tok::eof` token. However, when the parser hits a hard limit such as exceeding the maximum function scope depth (this is the case covered by #196244) or exceeding the bracket depth limit, it bails out via `Parser::cutOffParsing()`. `cutOffParsing` forces the current token to `eof`, but the token watcher callback is never called for it. The result is a truncated token stream. Fix by checking if `ExpandedTokens` is missing the final `tok::eof`. If so, synthesize one at the location of the last collected token. Includes tests to cover both the function scope depth and bracket depth limits. The fixture was extended to allow extra args (the bracket depth test uses `-fbracket-depth=1`). >From a740c3c7b75ef1c00e4ae974be4a3227e0b5d1ec Mon Sep 17 00:00:00 2001 From: Harlen Batagelo <[email protected]> Date: Sun, 10 May 2026 21:49:41 -0300 Subject: [PATCH] Synthesize missing eof token --- clang/lib/Tooling/Syntax/Tokens.cpp | 10 +++++ clang/unittests/Tooling/Syntax/TokensTest.cpp | 42 +++++++++++++++++-- 2 files changed, 49 insertions(+), 3 deletions(-) diff --git a/clang/lib/Tooling/Syntax/Tokens.cpp b/clang/lib/Tooling/Syntax/Tokens.cpp index 260654a0701fd..e6ca70e0cfb29 100644 --- a/clang/lib/Tooling/Syntax/Tokens.cpp +++ b/clang/lib/Tooling/Syntax/Tokens.cpp @@ -712,6 +712,16 @@ class TokenCollector::Builder { TokenBuffer build() && { assert(!Result.ExpandedTokens.empty()); + + // When the parser hits a hard limit (e.g. bracket depth or function scope + // depth), it halts prematurely and leaves the expanded token stream + // truncated with no final `eof` token. To keep the invariant, synthesize an + // `eof` at the location of the last collected token. + if (Result.ExpandedTokens.back().kind() != tok::eof) { + SourceLocation Loc = Result.ExpandedTokens.back().location(); + Result.ExpandedTokens.emplace_back(Loc, 0, tok::eof); + } + assert(Result.ExpandedTokens.back().kind() == tok::eof); // Tokenize every file that contributed tokens to the expanded stream. diff --git a/clang/unittests/Tooling/Syntax/TokensTest.cpp b/clang/unittests/Tooling/Syntax/TokensTest.cpp index 468ca5ddd2c75..ae84bda5b228b 100644 --- a/clang/unittests/Tooling/Syntax/TokensTest.cpp +++ b/clang/unittests/Tooling/Syntax/TokensTest.cpp @@ -92,7 +92,8 @@ class TokenCollectorTest : public ::testing::Test { /// Run the clang frontend, collect the preprocessed tokens from the frontend /// invocation and store them in this->Buffer. /// This also clears SourceManager before running the compiler. - void recordTokens(llvm::StringRef Code) { + void recordTokens(llvm::StringRef Code, + llvm::ArrayRef<const char *> ExtraArgs = {}) { class RecordTokens : public ASTFrontendAction { public: explicit RecordTokens(TokenBuffer &Result) : Result(Result) {} @@ -123,8 +124,10 @@ class TokenCollectorTest : public ::testing::Test { // Prepare to run a compiler. if (!Diags->getClient()) Diags->setClient(new IgnoringDiagConsumer); - std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only", - FileName}; + std::vector<const char *> Args = {"tok-test", "-std=c++03", + "-fsyntax-only"}; + Args.insert(Args.end(), ExtraArgs.begin(), ExtraArgs.end()); + Args.push_back(FileName); CreateInvocationOptions CIOpts; CIOpts.Diags = Diags; CIOpts.VFS = FS; @@ -1148,4 +1151,37 @@ TEST_F(TokenCollectorTest, Pragmas) { } )cpp"); } + +TEST_F(TokenBufferTest, EofTokenOnFunctionScopeDepthLimit) { + static_assert(ParmVarDecl::getMaxFunctionScopeDepth() == 127, + "Test input relies on a max depth of 127"); + + // Force parser to bail out due to exceeding the function scope depth limit. + // https://github.com/llvm/llvm-project/issues/196244 + recordTokens(R"cpp( + #define L [](int= + #define L4 L L L L + #define L16 L4 L4 L4 L4 + #define L64 L16 L16 L16 L16 + + void foo() { + L64 L64 L + } + )cpp"); + + ASSERT_GE(Buffer.expandedTokens().size(), 2u); + // The stream is truncated but ends with an `eof`. + EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof); + EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::kw_int); +} + +TEST_F(TokenBufferTest, EofTokenOnBracketDepthLimit) { + // Force parser to bail out due to exceeding the bracket depth limit. + recordTokens("((;", {"-fbracket-depth=1"}); + + ASSERT_GE(Buffer.expandedTokens().size(), 2u); + // The stream is truncated but ends with an `eof`. + EXPECT_EQ(Buffer.expandedTokens().back().kind(), tok::eof); + EXPECT_EQ(Buffer.expandedTokens().drop_back().back().kind(), tok::l_paren); +} } // namespace _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
