Author: yronglin Date: 2025-06-25T00:56:01+08:00 New Revision: 8b0d112478cec296517660c1c741b8e97aeaf684
URL: https://github.com/llvm/llvm-project/commit/8b0d112478cec296517660c1c741b8e97aeaf684 DIFF: https://github.com/llvm/llvm-project/commit/8b0d112478cec296517660c1c741b8e97aeaf684.diff LOG: [Clang][Preprocessor] Expand UCNs in macro concatenation (#145351) Fixs https://github.com/llvm/llvm-project/issues/145240. The UCN in preprocessor pasted identifier not resolved to unicode, it may cause the following issue: ```c #define CAT(a,b) a##b char foo\u00b5; char*p = &CAT(foo, \u00b5); // error: use of undeclared identifier 'foo\u00b5' ``` The real identifier after paste is `fooµ`. This PR fix this issue in `TokenLexer::pasteTokens`, if there has any UCN in pasting tokens, the final pasted token should have a Token::HasUCN flag. Then `Preprocessor::LookUpIdentifierInfo` will expand UCNs in this token. Signed-off-by: yronglin <yronglin...@gmail.com> Added: clang/test/Preprocessor/macro_paste_identifier_ucn.c Modified: clang/docs/ReleaseNotes.rst clang/lib/Lex/TokenLexer.cpp Removed: ################################################################################ diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 89d86c3371247..2b32f80df596b 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -722,6 +722,7 @@ Bug Fixes in This Version - Fixed incorrect token location when emitting diagnostics for tokens expanded from macros. (#GH143216) - Fixed an infinite recursion when checking constexpr destructors. (#GH141789) - Fixed a crash when a malformed using declaration appears in a ``constexpr`` function. (#GH144264) +- Fixed a bug when use unicode character name in macro concatenation. (#GH145240) Bug Fixes to Compiler Builtins ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/clang/lib/Lex/TokenLexer.cpp b/clang/lib/Lex/TokenLexer.cpp index fbb8c4262d6da..47f4134fb1465 100644 --- a/clang/lib/Lex/TokenLexer.cpp +++ b/clang/lib/Lex/TokenLexer.cpp @@ -748,6 +748,7 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, const char *ResultTokStrPtr = nullptr; SourceLocation StartLoc = LHSTok.getLocation(); SourceLocation PasteOpLoc; + bool HasUCNs = false; auto IsAtEnd = [&TokenStream, &CurIdx] { return TokenStream.size() == CurIdx; @@ -885,6 +886,9 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, // Finally, replace LHS with the result, consume the RHS, and iterate. ++CurIdx; + + // Set Token::HasUCN flag if LHS or RHS contains any UCNs. + HasUCNs = LHSTok.hasUCN() || RHS.hasUCN() || HasUCNs; LHSTok = Result; } while (!IsAtEnd() && TokenStream[CurIdx].is(tok::hashhash)); @@ -913,6 +917,13 @@ bool TokenLexer::pasteTokens(Token &LHSTok, ArrayRef<Token> TokenStream, // token pasting re-lexes the result token in raw mode, identifier information // isn't looked up. As such, if the result is an identifier, look up id info. if (LHSTok.is(tok::raw_identifier)) { + + // If there has any UNCs in concated token, we should mark this token + // with Token::HasUCN flag, then LookUpIdentifierInfo will expand UCNs in + // token. + if (HasUCNs) + LHSTok.setFlag(Token::HasUCN); + // Look up the identifier info for the token. We disabled identifier lookup // by saying we're skipping contents, so we need to do this manually. PP.LookUpIdentifierInfo(LHSTok); diff --git a/clang/test/Preprocessor/macro_paste_identifier_ucn.c b/clang/test/Preprocessor/macro_paste_identifier_ucn.c new file mode 100644 index 0000000000000..c9eb8190edfe8 --- /dev/null +++ b/clang/test/Preprocessor/macro_paste_identifier_ucn.c @@ -0,0 +1,10 @@ +// RUN: %clang_cc1 -fms-extensions %s -verify +// RUN: %clang_cc1 -E -fms-extensions %s | FileCheck %s +// expected-no-diagnostics + +#define CAT(a,b) a##b + +char foo\u00b5; +char*p = &CAT(foo, \u00b5); +// CHECK: char fooµ; +// CHECK-NEXT: char*p = &fooµ; _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits