sstwcw updated this revision to Diff 540752. sstwcw added a comment. - Add back the const qualifier
Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D154093/new/ https://reviews.llvm.org/D154093 Files: clang/docs/ClangFormatStyleOptions.rst clang/include/clang/Format/Format.h clang/lib/Format/BreakableToken.cpp clang/lib/Format/BreakableToken.h clang/lib/Format/ContinuationIndenter.cpp clang/lib/Format/FormatToken.h clang/lib/Format/TokenAnnotator.cpp clang/lib/Format/WhitespaceManager.cpp clang/unittests/Format/FormatTestVerilog.cpp clang/unittests/Format/TokenAnnotatorTest.cpp
Index: clang/unittests/Format/TokenAnnotatorTest.cpp =================================================================== --- clang/unittests/Format/TokenAnnotatorTest.cpp +++ clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1802,6 +1802,30 @@ EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown); EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown); EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown); + + // String literals in concatenation. + Tokens = Annotate("x = {\"\"};"); + ASSERT_EQ(Tokens.size(), 7u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation); + Tokens = Annotate("x = {\"\", \"\"};"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation); + EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation); + Tokens = Annotate("x = '{{\"\"}};"); + ASSERT_EQ(Tokens.size(), 10u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation); + // Cases where the string should not be annotated that type. Fix the + // `TT_Unknown` if needed in the future. + Tokens = Annotate("x = {\"\" == \"\"};"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown); + EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown); + Tokens = Annotate("x = {(\"\")};"); + ASSERT_EQ(Tokens.size(), 9u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown); + Tokens = Annotate("x = '{\"\"};"); + ASSERT_EQ(Tokens.size(), 8u) << Tokens; + EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown); } TEST_F(TokenAnnotatorTest, UnderstandConstructors) { Index: clang/unittests/Format/FormatTestVerilog.cpp =================================================================== --- clang/unittests/Format/FormatTestVerilog.cpp +++ clang/unittests/Format/FormatTestVerilog.cpp @@ -1155,6 +1155,66 @@ verifyFormat("{<<byte{j}} = x;"); } +TEST_F(FormatTestVerilog, StringLiteral) { + // Long strings should be broken. + verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ", + "xxxx"});)", + R"(x({"xxxxxxxxxxxxxxxx xxxx"});)", + getStyleWithColumns(getDefaultStyle(), 23)); + verifyFormat(R"(x({"xxxxxxxxxxxxxxxx", + " xxxx"});)", + R"(x({"xxxxxxxxxxxxxxxx xxxx"});)", + getStyleWithColumns(getDefaultStyle(), 22)); + // Braces should be added when they don't already exist. + verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ", + "xxxx"});)", + R"(x("xxxxxxxxxxxxxxxx xxxx");)", + getStyleWithColumns(getDefaultStyle(), 23)); + verifyFormat(R"(x({"xxxxxxxxxxxxxxxx", + " xxxx"});)", + R"(x("xxxxxxxxxxxxxxxx xxxx");)", + getStyleWithColumns(getDefaultStyle(), 22)); + verifyFormat(R"(x({{"xxxxxxxxxxxxxxxx ", + "xxxx"} == x});)", + R"(x({"xxxxxxxxxxxxxxxx xxxx" == x});)", + getStyleWithColumns(getDefaultStyle(), 24)); + verifyFormat(R"(string x = {"xxxxxxxxxxxxxxxx ", + "xxxxxxxx"};)", + R"(string x = "xxxxxxxxxxxxxxxx xxxxxxxx";)", + getStyleWithColumns(getDefaultStyle(), 32)); + // Space around braces should be correct. + auto Style = getStyleWithColumns(getDefaultStyle(), 24); + Style.Cpp11BracedListStyle = false; + verifyFormat(R"(x({ "xxxxxxxxxxxxxxxx ", + "xxxx" });)", + R"(x("xxxxxxxxxxxxxxxx xxxx");)", Style); + // Braces should not be added twice. + verifyFormat(R"(x({"xxxxxxxx", + "xxxxxxxx", + "xxxxxx"});)", + R"(x("xxxxxxxxxxxxxxxxxxxxxx");)", + getStyleWithColumns(getDefaultStyle(), 14)); + verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ", + "xxxxxxxxxxxxxxxx ", + "xxxx"});)", + R"(x("xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx xxxx");)", + getStyleWithColumns(getDefaultStyle(), 23)); + verifyFormat(R"(x({"xxxxxxxxxxxxxxxx ", + "xxxxxxxxxxxxxxxx ", + "xxxx"});)", + R"(x({"xxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxx ", "xxxx"});)", + getStyleWithColumns(getDefaultStyle(), 23)); + // These kinds of strings don't exist in Verilog. + verifyNoCrash(R"(x(@"xxxxxxxxxxxxxxxx xxxx");)", + getStyleWithColumns(getDefaultStyle(), 23)); + verifyNoCrash(R"(x(u"xxxxxxxxxxxxxxxx xxxx");)", + getStyleWithColumns(getDefaultStyle(), 23)); + verifyNoCrash(R"(x(u8"xxxxxxxxxxxxxxxx xxxx");)", + getStyleWithColumns(getDefaultStyle(), 23)); + verifyNoCrash(R"(x(_T("xxxxxxxxxxxxxxxx xxxx"));)", + getStyleWithColumns(getDefaultStyle(), 23)); +} + TEST_F(FormatTestVerilog, StructLiteral) { verifyFormat("c = '{0, 0.0};"); verifyFormat("c = '{'{1, 1.0}, '{2, 2.0}};"); Index: clang/lib/Format/WhitespaceManager.cpp =================================================================== --- clang/lib/Format/WhitespaceManager.cpp +++ clang/lib/Format/WhitespaceManager.cpp @@ -22,8 +22,13 @@ bool WhitespaceManager::Change::IsBeforeInFile::operator()( const Change &C1, const Change &C2) const { return SourceMgr.isBeforeInTranslationUnit( - C1.OriginalWhitespaceRange.getBegin(), - C2.OriginalWhitespaceRange.getBegin()); + C1.OriginalWhitespaceRange.getBegin(), + C2.OriginalWhitespaceRange.getBegin()) || + (C1.OriginalWhitespaceRange.getBegin() == + C2.OriginalWhitespaceRange.getBegin() && + SourceMgr.isBeforeInTranslationUnit( + C1.OriginalWhitespaceRange.getEnd(), + C2.OriginalWhitespaceRange.getEnd())); } WhitespaceManager::Change::Change(const FormatToken &Tok, @@ -1415,10 +1420,31 @@ void WhitespaceManager::generateChanges() { for (unsigned i = 0, e = Changes.size(); i != e; ++i) { const Change &C = Changes[i]; - if (i > 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() == - C.OriginalWhitespaceRange.getBegin()) { - // Do not generate two replacements for the same location. - continue; + if (i > 0) { + auto Last = Changes[i - 1].OriginalWhitespaceRange; + auto New = Changes[i].OriginalWhitespaceRange; + // Do not generate two replacements for the same location. As a special + // case, it is allowed if there is a replacement for the empty range + // between 2 tokens and another non-empty range at the start of the second + // token. + // + // An original whitespace range that is empty is encountered when 2 tokens + // have no whitespace in between in the input. It does not matter whether + // whitespace is to be added. For example, if the input is `foo();`, + // there will be a replacement for the range between every consecutive + // pair of tokens. + // + // A replacement at the start of a token can be added by + // `BreakableVerilogStringLiteral::insertBreak` when it adds braces around + // the string literal. Say the line `x("long string");` is to become the + // 2 lines `x({"long ",` and ` "string"});`. There will be a + // replacement for the empty range between the parenthesis and the string + // and another replacement for the quote character. + if (Last.getBegin() == New.getBegin() && + (Last.getEnd() != Last.getBegin() || + New.getEnd() == New.getBegin())) { + continue; + } } if (C.CreateReplacement) { std::string ReplacementText = C.PreviousLinePostfix; Index: clang/lib/Format/TokenAnnotator.cpp =================================================================== --- clang/lib/Format/TokenAnnotator.cpp +++ clang/lib/Format/TokenAnnotator.cpp @@ -862,6 +862,11 @@ OpeningBrace.Previous->is(TT_JsTypeColon)) { Contexts.back().IsExpression = false; } + if (Style.isVerilog() && + (!OpeningBrace.getPreviousNonComment() || + OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) { + Contexts.back().VerilogMayBeConcatenation = true; + } unsigned CommaCount = 0; while (CurrentToken) { @@ -1736,6 +1741,9 @@ bool InCpp11AttributeSpecifier = false; bool InCSharpAttributeSpecifier = false; bool VerilogAssignmentFound = false; + // Whether the braces may mean concatenation instead of structure or array + // literal. + bool VerilogMayBeConcatenation = false; enum { Unknown, // Like the part after `:` in a constructor. @@ -2068,6 +2076,14 @@ } else { Current.setType(TT_LineComment); } + } else if (Current.is(tok::string_literal)) { + if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation && + Current.getPreviousNonComment() && + Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) && + Current.getNextNonComment() && + Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) { + Current.setType(TT_VerilogStringInConcatenation); + } } else if (Current.is(tok::l_paren)) { if (lParenStartsCppCast(Current)) Current.setType(TT_CppCastLParen); Index: clang/lib/Format/FormatToken.h =================================================================== --- clang/lib/Format/FormatToken.h +++ clang/lib/Format/FormatToken.h @@ -163,6 +163,8 @@ TYPE(VerilogNumberBase) \ /* like `(strong1, pull0)` */ \ TYPE(VerilogStrength) \ + /* A string in a concatenation like `{"some ", "text"}`. */ \ + TYPE(VerilogStringInConcatenation) \ /* Things inside the table in user-defined primitives. */ \ TYPE(VerilogTableItem) \ /* those that separate ports of different types */ \ Index: clang/lib/Format/ContinuationIndenter.cpp =================================================================== --- clang/lib/Format/ContinuationIndenter.cpp +++ clang/lib/Format/ContinuationIndenter.cpp @@ -2175,6 +2175,22 @@ return nullptr; StringRef Text = Current.TokenText; + // We need this to address the case where there is an unbreakable tail only + // if certain other formatting decisions have been taken. The + // UnbreakableTailLength of Current is an overapproximation is that case and + // we need to be correct here. + unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) + ? 0 + : Current.UnbreakableTailLength; + + if (Style.isVerilog()) { + if (Text.size() < 2u || !Text.startswith("\"") || !Text.endswith("\"")) + return nullptr; + return std::make_unique<BreakableVerilogStringLiteral>( + Current, StartColumn, UnbreakableTailLength, + State.Line->InPPDirective, Encoding, Style); + } + StringRef Prefix; StringRef Postfix; // FIXME: Handle whitespace between '_T', '(', '"..."', and ')'. @@ -2187,13 +2203,6 @@ Text.startswith(Prefix = "u8\"") || Text.startswith(Prefix = "L\""))) || (Text.startswith(Prefix = "_T(\"") && Text.endswith(Postfix = "\")"))) { - // We need this to address the case where there is an unbreakable tail - // only if certain other formatting decisions have been taken. The - // UnbreakableTailLength of Current is an overapproximation is that case - // and we need to be correct here. - unsigned UnbreakableTailLength = (State.NextToken && canBreak(State)) - ? 0 - : Current.UnbreakableTailLength; return std::make_unique<BreakableStringLiteral>( Current, StartColumn, Prefix, Postfix, UnbreakableTailLength, State.Line->InPPDirective, Encoding, Style); Index: clang/lib/Format/BreakableToken.h =================================================================== --- clang/lib/Format/BreakableToken.h +++ clang/lib/Format/BreakableToken.h @@ -274,7 +274,9 @@ unsigned StartColumn; // The prefix a line needs after a break in the token. StringRef Prefix; - // The postfix a line needs before introducing a break. + // The closing quote of the string. Except in the + // BreakableVerilogStringliteral subclass, it is also the postfix to be added + // to the first part when the string is broken. StringRef Postfix; // The token text excluding the prefix and postfix. StringRef Line; @@ -283,6 +285,34 @@ unsigned UnbreakableTailLength; }; +class BreakableVerilogStringLiteral : public BreakableStringLiteral { +public: + /// Creates a breakable token for a single line Verilog string literal. + /// + /// \p StartColumn specifies the column in which the token will start + /// after formatting. + BreakableVerilogStringLiteral(const FormatToken &Tok, unsigned StartColumn, + unsigned UnbreakableTailLength, + bool InPPDirective, encoding::Encoding Encoding, + const FormatStyle &Style); + Split getSplit(unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ContentStartColumn, + const llvm::Regex &CommentPragmasRegex) const override; + void insertBreak(unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, + WhitespaceManager &Whitespaces) const override; + +protected: + // Whether braces should be inserted around the string to form a + // concatenation. + mutable bool BracesNeeded; + // The braces along with the quotes they replace. Depending on the style. + const StringRef LeftBraceQuote; + const StringRef RightBraceQuote; + // Width added to the left. + const unsigned AdditionalStartColumnWidth; +}; + class BreakableComment : public BreakableToken { protected: /// Creates a breakable token for a comment. Index: clang/lib/Format/BreakableToken.cpp =================================================================== --- clang/lib/Format/BreakableToken.cpp +++ clang/lib/Format/BreakableToken.cpp @@ -292,6 +292,55 @@ Prefix, InPPDirective, 1, StartColumn); } +BreakableVerilogStringLiteral::BreakableVerilogStringLiteral( + const FormatToken &Tok, unsigned StartColumn, + unsigned UnbreakableTailLength, bool InPPDirective, + encoding::Encoding Encoding, const FormatStyle &Style) + : BreakableStringLiteral(Tok, StartColumn, /*Prefix=*/"\"", + /*Postfix=*/"\"", UnbreakableTailLength, + InPPDirective, Encoding, Style), + BracesNeeded(Tok.isNot(TT_VerilogStringInConcatenation)), + LeftBraceQuote(Style.Cpp11BracedListStyle ? "{\"" : "{ \""), + RightBraceQuote(Style.Cpp11BracedListStyle ? "\"}" : "\" }"), + AdditionalStartColumnWidth(BracesNeeded ? LeftBraceQuote.size() - 1u + : 0u) {} + +BreakableToken::Split BreakableVerilogStringLiteral::getSplit( + unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit, + unsigned ContentStartColumn, const llvm::Regex &CommentPragmasRegex) const { + // 2 is subtracted from the column limit for the closing quote and comma. + return getStringSplit( + Line.substr(TailOffset), ContentStartColumn + AdditionalStartColumnWidth, + std::max(3u, ColumnLimit) - 2, Style.TabWidth, Encoding); +} + +void BreakableVerilogStringLiteral::insertBreak( + unsigned LineIndex, unsigned TailOffset, Split Split, + unsigned ContentIndent, WhitespaceManager &Whitespaces) const { + if (BracesNeeded) { + // For Verilog, concatenations need to be wrapped in braces. To add a + // brace, we replace the quote with a brace and another quote. This is + // because the rest of the program requires one replacement for each source + // range. If we replace the empty strings around the string, it may + // conflict with whitespace replacements between the string and adjacent + // tokens. + BracesNeeded = false; + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/0, /*ReplaceChars=*/1, /*PreviousPostfix=*/"", + /*CurrentPrefix=*/LeftBraceQuote, InPPDirective, /*NewLines=*/0, + /*Spaces=*/0); + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/Tok.TokenText.size() - 1, /*ReplaceChars=*/1, + /*PreviousPostfix=*/RightBraceQuote, + /*CurrentPrefix=*/"", InPPDirective, /*NewLines=*/0, /*Spaces=*/0); + } + Whitespaces.replaceWhitespaceInToken( + Tok, /*Offset=*/1u + TailOffset + Split.first, + /*ReplaceChars=*/Split.second, /*PreviousPostfix=*/"\",", + /*CurrentPrefix=*/Prefix, InPPDirective, /*NewLines=*/1, + /*Spaces=*/StartColumn + AdditionalStartColumnWidth); +} + BreakableComment::BreakableComment(const FormatToken &Token, unsigned StartColumn, bool InPPDirective, encoding::Encoding Encoding, Index: clang/include/clang/Format/Format.h =================================================================== --- clang/include/clang/Format/Format.h +++ clang/include/clang/Format/Format.h @@ -1910,6 +1910,8 @@ bool BreakAfterJavaFieldAnnotations; /// Allow breaking string literals when formatting. + /// + /// In C, C++, and Objective-C: /// \code /// true: /// const char* x = "veryVeryVeryVeryVeryVe" @@ -1918,8 +1920,21 @@ /// /// false: /// const char* x = - /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + /// \endcode + /// + /// In Verilog: + /// \code + /// true: + /// string x = {"veryVeryVeryVeryVeryVe", + /// "ryVeryVeryVeryVeryVery", + /// "VeryLongString"}; + /// + /// false: + /// string x = + /// "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; /// \endcode + /// /// \version 3.9 bool BreakStringLiterals; Index: clang/docs/ClangFormatStyleOptions.rst =================================================================== --- clang/docs/ClangFormatStyleOptions.rst +++ clang/docs/ClangFormatStyleOptions.rst @@ -2623,6 +2623,8 @@ **BreakStringLiterals** (``Boolean``) :versionbadge:`clang-format 3.9` :ref:`¶ <BreakStringLiterals>` Allow breaking string literals when formatting. + In C, C++, and Objective-C: + .. code-block:: c++ true: @@ -2632,7 +2634,20 @@ false: const char* x = - "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; + + In Verilog: + + .. code-block:: c++ + + true: + string x = {"veryVeryVeryVeryVeryVe", + "ryVeryVeryVeryVeryVery", + "VeryLongString"}; + + false: + string x = + "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString"; .. _ColumnLimit:
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits