curdeius created this revision. curdeius added reviewers: MyDeveloperDay, HazardyKnusperkeks, owenpan. curdeius requested review of this revision. Herald added a project: clang. Herald added a subscriber: cfe-commits.
Fixes https://github.com/llvm/llvm-project/issues/31592. This commits enables lexing of digraphs in C++11 and onwards. Enabling them in C++03 is error-prone, as it would inconditionnally treat sequences like "<:" as digraphs, even if they are followed by a single colon, e.g. "<::" would be treated as "[:" instead of "<" followed by "::". Lexing in C++11 doesn't have this problem as it looks ahead the following token. The relevant excerpt from Lexer::LexTokenInternal: // C++0x [lex.pptoken]p3: // Otherwise, if the next three characters are <:: and the subsequent // character is neither : nor >, the < is treated as a preprocessor // token by itself and not as the first character of the alternative // token <:. Also, note that both clang and gcc turn on digraphs by default (-fdigraphs), so clang-format should match this behaviour. Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D118706 Files: clang/lib/Format/Format.cpp clang/unittests/Format/FormatTest.cpp Index: clang/unittests/Format/FormatTest.cpp =================================================================== --- clang/unittests/Format/FormatTest.cpp +++ clang/unittests/Format/FormatTest.cpp @@ -24219,6 +24219,16 @@ Style); } +TEST_F(FormatTest, UnderstandsDigraphs) { + verifyFormat("int arr<:5:> = {};"); + verifyFormat("int arr[5] = <%%>;"); + verifyFormat("int arr<:::qualified_variable:> = {};"); + verifyFormat("int arr[::qualified_variable] = <%%>;"); + verifyFormat("%:include <header>"); + verifyFormat("%:define A x##y"); + verifyFormat("#define A x%:%:y"); +} + } // namespace } // namespace format } // namespace clang Index: clang/lib/Format/Format.cpp =================================================================== --- clang/lib/Format/Format.cpp +++ clang/lib/Format/Format.cpp @@ -3242,6 +3242,10 @@ LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; LangOpts.CPlusPlus20 = LexingStd >= FormatStyle::LS_Cpp20; LangOpts.Char8 = LexingStd >= FormatStyle::LS_Cpp20; + // Turning on digraphs in standards before C++0x is error-prone, because e.g. + // the sequence "<::" will be inconditionally treated as "[:". + // Cf. Lexer::LexTokenInternal. + LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11; LangOpts.LineComment = 1; bool AlternativeOperators = Style.isCpp();
Index: clang/unittests/Format/FormatTest.cpp =================================================================== --- clang/unittests/Format/FormatTest.cpp +++ clang/unittests/Format/FormatTest.cpp @@ -24219,6 +24219,16 @@ Style); } +TEST_F(FormatTest, UnderstandsDigraphs) { + verifyFormat("int arr<:5:> = {};"); + verifyFormat("int arr[5] = <%%>;"); + verifyFormat("int arr<:::qualified_variable:> = {};"); + verifyFormat("int arr[::qualified_variable] = <%%>;"); + verifyFormat("%:include <header>"); + verifyFormat("%:define A x##y"); + verifyFormat("#define A x%:%:y"); +} + } // namespace } // namespace format } // namespace clang Index: clang/lib/Format/Format.cpp =================================================================== --- clang/lib/Format/Format.cpp +++ clang/lib/Format/Format.cpp @@ -3242,6 +3242,10 @@ LangOpts.CPlusPlus17 = LexingStd >= FormatStyle::LS_Cpp17; LangOpts.CPlusPlus20 = LexingStd >= FormatStyle::LS_Cpp20; LangOpts.Char8 = LexingStd >= FormatStyle::LS_Cpp20; + // Turning on digraphs in standards before C++0x is error-prone, because e.g. + // the sequence "<::" will be inconditionally treated as "[:". + // Cf. Lexer::LexTokenInternal. + LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11; LangOpts.LineComment = 1; bool AlternativeOperators = Style.isCpp();
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits