Author: Hirofumi Nakamura Date: 2024-01-31T00:30:37+09:00 New Revision: 00582636009d51c5781b9cae8fde858ab3758306
URL: https://github.com/llvm/llvm-project/commit/00582636009d51c5781b9cae8fde858ab3758306 DIFF: https://github.com/llvm/llvm-project/commit/00582636009d51c5781b9cae8fde858ab3758306.diff LOG: [clang-format] Support of TableGen tokens with unary operator like form, bang operators and numeric literals. (#78996) Adds the support for tokens that have forms like unary operators. - bang operators: `!name` - cond operator: `!cond` - numeric literals: `+1`, `-1` cond operator are one of bang operators but is distinguished because it has very specific syntax. Added: Modified: clang/lib/Format/FormatToken.h clang/lib/Format/FormatTokenLexer.cpp clang/unittests/Format/TokenAnnotatorTest.cpp Removed: ################################################################################ diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index dede89f260015..bace91b5f99b4 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -148,6 +148,8 @@ namespace format { TYPE(StructLBrace) \ TYPE(StructRBrace) \ TYPE(StructuredBindingLSquare) \ + TYPE(TableGenBangOperator) \ + TYPE(TableGenCondOperator) \ TYPE(TableGenMultiLineString) \ TYPE(TemplateCloser) \ TYPE(TemplateOpener) \ diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 52a55ea23b5f2..d7de09ef0e12a 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -276,13 +276,44 @@ void FormatTokenLexer::tryMergePreviousTokens() { return; } } - // TableGen's Multi line string starts with [{ - if (Style.isTableGen() && tryMergeTokens({tok::l_square, tok::l_brace}, - TT_TableGenMultiLineString)) { - // Set again with finalizing. This must never be annotated as other types. - Tokens.back()->setFinalizedType(TT_TableGenMultiLineString); - Tokens.back()->Tok.setKind(tok::string_literal); - return; + if (Style.isTableGen()) { + // TableGen's Multi line string starts with [{ + if (tryMergeTokens({tok::l_square, tok::l_brace}, + TT_TableGenMultiLineString)) { + // Set again with finalizing. This must never be annotated as other types. + Tokens.back()->setFinalizedType(TT_TableGenMultiLineString); + Tokens.back()->Tok.setKind(tok::string_literal); + return; + } + // TableGen's bang operator is the form !<name>. + // !cond is a special case with specific syntax. + if (tryMergeTokens({tok::exclaim, tok::identifier}, + TT_TableGenBangOperator)) { + Tokens.back()->Tok.setKind(tok::identifier); + Tokens.back()->Tok.setIdentifierInfo(nullptr); + if (Tokens.back()->TokenText == "!cond") + Tokens.back()->setFinalizedType(TT_TableGenCondOperator); + else + Tokens.back()->setFinalizedType(TT_TableGenBangOperator); + return; + } + if (tryMergeTokens({tok::exclaim, tok::kw_if}, TT_TableGenBangOperator)) { + // Here, "! if" becomes "!if". That is, ! captures if even when the space + // exists. That is only one possibility in TableGen's syntax. + Tokens.back()->Tok.setKind(tok::identifier); + Tokens.back()->Tok.setIdentifierInfo(nullptr); + Tokens.back()->setFinalizedType(TT_TableGenBangOperator); + return; + } + // +, - with numbers are literals. Not unary operators. + if (tryMergeTokens({tok::plus, tok::numeric_constant}, TT_Unknown)) { + Tokens.back()->Tok.setKind(tok::numeric_constant); + return; + } + if (tryMergeTokens({tok::minus, tok::numeric_constant}, TT_Unknown)) { + Tokens.back()->Tok.setKind(tok::numeric_constant); + return; + } } } diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 67678c18963b1..f3e443e8829bd 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -2215,16 +2215,24 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { EXPECT_TRUE(Tokens[0]->IsMultiline); EXPECT_EQ(Tokens[0]->LastLineColumnWidth, sizeof(" the string. }]") - 1); + // Numeric literals. + Tokens = Annotate("1234"); + EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); + Tokens = Annotate("-1"); + EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); + Tokens = Annotate("+1234"); + EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); + Tokens = Annotate("0b0110"); + EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); + Tokens = Annotate("0x1abC"); + EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); + // Identifier tokens. In TableGen, identifiers can begin with a number. // In ambiguous cases, the lexer tries to lex it as a number. // Even if the try fails, it does not fall back to identifier lexing and // regard as an error. // The ambiguity is not documented. The result of those tests are based on the // implementation of llvm::TGLexer::LexToken. - Tokens = Annotate("1234"); - EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); - Tokens = Annotate("0x1abC"); - EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); // This is invalid syntax of number, but not an identifier. Tokens = Annotate("0x1234x"); EXPECT_TOKEN(Tokens[0], tok::numeric_constant, TT_Unknown); @@ -2249,6 +2257,14 @@ TEST_F(TokenAnnotatorTest, UnderstandTableGenTokens) { EXPECT_TOKEN(Tokens[6], tok::l_brace, TT_ElseLBrace); Tokens = Annotate("defset Foo Def2 = {}"); EXPECT_TOKEN(Tokens[4], tok::l_brace, TT_FunctionLBrace); + + // Bang Operators. + Tokens = Annotate("!foreach"); + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator); + Tokens = Annotate("!if"); + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenBangOperator); + Tokens = Annotate("!cond"); + EXPECT_TOKEN(Tokens[0], tok::identifier, TT_TableGenCondOperator); } TEST_F(TokenAnnotatorTest, UnderstandConstructors) { _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits