https://github.com/yronglin created https://github.com/llvm/llvm-project/pull/143898
This PR introduce a new function `peekNextPPToken`. It's an extension of `isNextPPTokenLParen` and can makes look ahead one token in preprocessor without side-effects. It's also the 1st part of https://github.com/llvm/llvm-project/pull/107168 and it was used to look ahead next token then determine whether current lexing pp directive is one of pp-import or pp-module directive. At the start of phase 4 an import or module token is treated as starting a directive and are converted to their respective keywords iff: - After skipping horizontal whitespace are - at the start of a logical line, or - preceded by an export at the start of the logical line. - Are followed by an identifier pp token (before macro expansion), or - <, ", or : (but not ::) pp tokens for import, or - ; for module Otherwise the token is treated as an identifier. >From 373daed324ca99f0b327c424c64a101f7d0f99a3 Mon Sep 17 00:00:00 2001 From: yronglin <yronglin...@gmail.com> Date: Thu, 12 Jun 2025 21:46:13 +0800 Subject: [PATCH] [Clang] Add peekNextPPToken, makes peek next token without side-effects Signed-off-by: yronglin <yronglin...@gmail.com> --- clang/include/clang/Lex/Lexer.h | 10 ++++---- clang/include/clang/Lex/Preprocessor.h | 8 ++++++- clang/include/clang/Lex/TokenLexer.h | 7 +++--- clang/lib/Lex/Lexer.cpp | 21 +++++++++-------- clang/lib/Lex/PPMacroExpansion.cpp | 32 ++++++++++++-------------- clang/lib/Lex/TokenLexer.cpp | 10 ++++---- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/clang/include/clang/Lex/Lexer.h b/clang/include/clang/Lex/Lexer.h index bb65ae010cffa..a595cda1eaa77 100644 --- a/clang/include/clang/Lex/Lexer.h +++ b/clang/include/clang/Lex/Lexer.h @@ -124,7 +124,7 @@ class Lexer : public PreprocessorLexer { //===--------------------------------------------------------------------===// // Context that changes as the file is lexed. // NOTE: any state that mutates when in raw mode must have save/restore code - // in Lexer::isNextPPTokenLParen. + // in Lexer::peekNextPPToken. // BufferPtr - Current pointer into the buffer. This is the next character // to be lexed. @@ -642,10 +642,10 @@ class Lexer : public PreprocessorLexer { BufferPtr = TokEnd; } - /// isNextPPTokenLParen - Return 1 if the next unexpanded token will return a - /// tok::l_paren token, 0 if it is something else and 2 if there are no more - /// tokens in the buffer controlled by this lexer. - unsigned isNextPPTokenLParen(); + /// peekNextPPToken - Return std::nullopt if there are no more tokens in the + /// buffer controlled by this lexer, otherwise return the next unexpanded + /// token. + std::optional<Token> peekNextPPToken(); //===--------------------------------------------------------------------===// // Lexer character reading interfaces. diff --git a/clang/include/clang/Lex/Preprocessor.h b/clang/include/clang/Lex/Preprocessor.h index 78be2bd64d61c..4fe7a79393afc 100644 --- a/clang/include/clang/Lex/Preprocessor.h +++ b/clang/include/clang/Lex/Preprocessor.h @@ -2288,7 +2288,9 @@ class Preprocessor { /// Determine whether the next preprocessor token to be /// lexed is a '('. If so, consume the token and return true, if not, this /// method should have no observable side-effect on the lexed tokens. - bool isNextPPTokenLParen(); + bool isNextPPTokenLParen() { + return peekNextPPToken().value_or(Token{}).is(tok::l_paren); + } private: /// Identifiers used for SEH handling in Borland. These are only @@ -2667,6 +2669,10 @@ class Preprocessor { void removeCachedMacroExpandedTokensOfLastLexer(); + /// Peek the next token. If so, return the token, if not, this + /// method should have no observable side-effect on the lexed tokens. + std::optional<Token> peekNextPPToken(); + /// After reading "MACRO(", this method is invoked to read all of the formal /// arguments specified for the macro invocation. Returns null on error. MacroArgs *ReadMacroCallArgumentList(Token &MacroName, MacroInfo *MI, diff --git a/clang/include/clang/Lex/TokenLexer.h b/clang/include/clang/Lex/TokenLexer.h index 4d229ae610674..777b4e6266c71 100644 --- a/clang/include/clang/Lex/TokenLexer.h +++ b/clang/include/clang/Lex/TokenLexer.h @@ -139,10 +139,9 @@ class TokenLexer { void Init(const Token *TokArray, unsigned NumToks, bool DisableMacroExpansion, bool OwnsTokens, bool IsReinject); - /// If the next token lexed will pop this macro off the - /// expansion stack, return 2. If the next unexpanded token is a '(', return - /// 1, otherwise return 0. - unsigned isNextTokenLParen() const; + /// If the next token lexed will pop this macro off the expansion stack, + /// return std::nullopt, otherwise return the next unexpanded token. + std::optional<Token> peekNextPPToken() const; /// Lex and return a token from this macro stream. bool Lex(Token &Tok); diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp index 93200458f04b4..8e977eac8c983 100644 --- a/clang/lib/Lex/Lexer.cpp +++ b/clang/lib/Lex/Lexer.cpp @@ -3200,18 +3200,19 @@ bool Lexer::LexEndOfFile(Token &Result, const char *CurPtr) { return PP->HandleEndOfFile(Result, isPragmaLexer()); } -/// isNextPPTokenLParen - Return 1 if the next unexpanded token lexed from -/// the specified lexer will return a tok::l_paren token, 0 if it is something -/// else and 2 if there are no more tokens in the buffer controlled by the -/// lexer. -unsigned Lexer::isNextPPTokenLParen() { +/// peekNextPPToken - Return std::nullopt if there are no more tokens in the +/// buffer controlled by this lexer, otherwise return the next unexpanded +/// token. +std::optional<Token> Lexer::peekNextPPToken() { assert(!LexingRawMode && "How can we expand a macro from a skipping buffer?"); if (isDependencyDirectivesLexer()) { if (NextDepDirectiveTokenIndex == DepDirectives.front().Tokens.size()) - return 2; - return DepDirectives.front().Tokens[NextDepDirectiveTokenIndex].is( - tok::l_paren); + return std::nullopt; + Token Result; + (void)convertDependencyDirectiveToken( + DepDirectives.front().Tokens[NextDepDirectiveTokenIndex], Result); + return Result; } // Switch to 'skipping' mode. This will ensure that we can lex a token @@ -3240,8 +3241,8 @@ unsigned Lexer::isNextPPTokenLParen() { LexingRawMode = false; if (Tok.is(tok::eof)) - return 2; - return Tok.is(tok::l_paren); + return std::nullopt; + return Tok; } /// Find the end of a version control conflict marker. diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index 37ac1bf07e9c0..585990f60c98a 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -418,42 +418,40 @@ static bool isTrivialSingleTokenExpansion(const MacroInfo *MI, return !llvm::is_contained(MI->params(), II); } -/// isNextPPTokenLParen - Determine whether the next preprocessor token to be -/// lexed is a '('. If so, consume the token and return true, if not, this -/// method should have no observable side-effect on the lexed tokens. -bool Preprocessor::isNextPPTokenLParen() { +/// isNextPPTokenLParen - Peek the next token. If so, return the token, if not, +/// this method should have no observable side-effect on the lexed tokens. +std::optional<Token> Preprocessor::peekNextPPToken() { // Do some quick tests for rejection cases. - unsigned Val; + std::optional<Token> Val; if (CurLexer) - Val = CurLexer->isNextPPTokenLParen(); + Val = CurLexer->peekNextPPToken(); else - Val = CurTokenLexer->isNextTokenLParen(); + Val = CurTokenLexer->peekNextPPToken(); - if (Val == 2) { + if (!Val) { // We have run off the end. If it's a source file we don't // examine enclosing ones (C99 5.1.1.2p4). Otherwise walk up the // macro stack. if (CurPPLexer) - return false; + return std::nullopt; for (const IncludeStackInfo &Entry : llvm::reverse(IncludeMacroStack)) { if (Entry.TheLexer) - Val = Entry.TheLexer->isNextPPTokenLParen(); + Val = Entry.TheLexer->peekNextPPToken(); else - Val = Entry.TheTokenLexer->isNextTokenLParen(); + Val = Entry.TheTokenLexer->peekNextPPToken(); - if (Val != 2) + if (Val) break; // Ran off the end of a source file? if (Entry.ThePPLexer) - return false; + return std::nullopt; } } - // Okay, if we know that the token is a '(', lex it and return. Otherwise we - // have found something that isn't a '(' or we found the end of the - // translation unit. In either case, return false. - return Val == 1; + // Okay, we found the token and return. Otherwise we found the end of the + // translation unit. + return Val; } /// HandleMacroExpandedIdentifier - If an identifier token is read that is to be diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index 6e93416e01c0c..fbb8c4262d6da 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -921,13 +921,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, } /// isNextTokenLParen - If the next token lexed will pop this macro off the -/// expansion stack, return 2. If the next unexpanded token is a '(', return -/// 1, otherwise return 0. -unsigned TokenLexer::isNextTokenLParen() const { +/// expansion stack, return std::nullopt, otherwise return the next unexpanded +/// token. +std::optional<Token> TokenLexer::peekNextPPToken() const { // Out of tokens? if (isAtEnd()) - return 2; - return Tokens[CurTokenIdx].is(tok::l_paren); + return std::nullopt; + return Tokens[CurTokenIdx]; } /// isParsingPreprocessorDirective - Return true if we are in the middle of a _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits