[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-08-06 Thread sstwcw via Phabricator via cfe-commits
sstwcw updated this revision to Diff 547619.
sstwcw added a comment.

Add support for breaking strings with + in C# and others


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/BreakableToken.cpp
  clang/lib/Format/BreakableToken.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTestCSharp.cpp
  clang/unittests/Format/FormatTestJS.cpp
  clang/unittests/Format/FormatTestJava.cpp
  clang/unittests/Format/FormatTestVerilog.cpp
  clang/unittests/Format/TokenAnnotatorTest.cpp

Index: clang/unittests/Format/TokenAnnotatorTest.cpp
===
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1815,6 +1815,30 @@
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown);
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+
+  // String literals in concatenation.
+  Tokens = Annotate("x = {\"\"};");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_StringInConcatenation);
+  Tokens = Annotate("x = {\"\", \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_StringInConcatenation);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_StringInConcatenation);
+  Tokens = Annotate("x = '{{\"\"}};");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_StringInConcatenation);
+  // Cases where the string should not be annotated that type.  Fix the
+  // `TT_Unknown` if needed in the future.
+  Tokens = Annotate("x = {\"\" == \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = {(\"\")};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = '{\"\"};");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Index: clang/unittests/Format/FormatTestVerilog.cpp
===
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -1157,6 +1157,66 @@
   verifyFormat("{< Ranges(1, tooling::Range(Offset, Length));
-tooling::Replacements Replaces = reformat(Style, Code, Ranges);
-auto Result = applyAllReplacements(Code, Replaces);
-EXPECT_TRUE(static_cast(Result));
-LLVM_DEBUG(llvm::errs() << "\n" << *Result << "\n\n");
-return *Result;
-  }
-
-  static std::string
-  format(llvm::StringRef Code,
- const FormatStyle  = getGoogleStyle(FormatStyle::LK_Java)) {
-return format(Code, 0, Code.size(), Style);
+  FormatStyle getDefaultStyle() const override {
+return getGoogleStyle(FormatStyle::LK_Java);
   }
 
   static FormatStyle getStyleWithColumns(unsigned ColumnLimit) {
@@ -41,13 +26,6 @@
 Style.ColumnLimit = ColumnLimit;
 return Style;
   }
-
-  static void verifyFormat(
-  llvm::StringRef Code,
-  const FormatStyle  = getGoogleStyle(FormatStyle::LK_Java)) {
-EXPECT_EQ(Code.str(), format(Code, Style)) << "Expected code is not stable";
-EXPECT_EQ(Code.str(), format(test::messUp(Code), Style));
-  }
 };
 
 TEST_F(FormatTestJava, NoAlternativeOperatorNames) {
@@ -565,9 +543,9 @@
 }
 
 TEST_F(FormatTestJava, BreaksStringLiterals) {
-  // FIXME: String literal breaking is currently disabled for Java and JS, as it
-  // requires strings to be merged using "+" which we don't support.
-  verifyFormat("\"some text other\";", getStyleWithColumns(14));
+  verifyFormat("x = \"some text \"\n"
+   "+ \"other\";",
+   "x = \"some text other\";", getStyleWithColumns(18));
 }
 
 TEST_F(FormatTestJava, AlignsBlockComments) {
@@ -625,5 +603,7 @@
Style);
 }
 
+} // namespace
+} // namespace test
 } // namespace format
-} // end namespace clang
+} // namespace clang
Index: clang/unittests/Format/FormatTestJS.cpp
===
--- clang/unittests/Format/FormatTestJS.cpp
+++ clang/unittests/Format/FormatTestJS.cpp
@@ -1505,6 +1505,74 @@
 TEST_F(FormatTestJS, StringLiteralConcatenation) {
   verifyFormat("var literal = 'hello ' +\n"
"'world';");
+
+  // Long strings should be broken.
+  verifyFormat("var literal =\n"
+   "' ' +\n"
+   "'';",
+   "var literal = ' 

[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-29 Thread Owen Pan via Phabricator via cfe-commits
owenpan added a comment.

In D154093#4542339 , @sstwcw wrote:

> @owenpan What do you think about this revision especially the replacement 
> part?

See D154093#4544495 . Then we can 
extend it by using `,` instead of `+` for Verilog (plus inserting a pair of 
braces).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-29 Thread Owen Pan via Phabricator via cfe-commits
owenpan added inline comments.



Comment at: clang/lib/Format/ContinuationIndenter.cpp:2152-2154
 // FIXME: String literal breaking is currently disabled for C#, Java, Json
 // and JavaScript, as it requires strings to be merged using "+" which we
 // don't support.

Can we support using `+` first?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-28 Thread sstwcw via Phabricator via cfe-commits
sstwcw added a comment.

@owenpan What do you think about this revision especially the replacement part?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-21 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added a comment.

For the replacement part, I refer to @owenpan and/or @MyDeveloperDay hoping 
they have more insight.
The rest look good to me.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-21 Thread sstwcw via Phabricator via cfe-commits
sstwcw updated this revision to Diff 542916.
sstwcw added a comment.

- Add comment explaining the replacements


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/BreakableToken.cpp
  clang/lib/Format/BreakableToken.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTestVerilog.cpp
  clang/unittests/Format/TokenAnnotatorTest.cpp

Index: clang/unittests/Format/TokenAnnotatorTest.cpp
===
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1802,6 +1802,30 @@
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown);
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+
+  // String literals in concatenation.
+  Tokens = Annotate("x = {\"\"};");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = {\"\", \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = '{{\"\"}};");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  // Cases where the string should not be annotated that type.  Fix the
+  // `TT_Unknown` if needed in the future.
+  Tokens = Annotate("x = {\"\" == \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = {(\"\")};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = '{\"\"};");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Index: clang/unittests/Format/FormatTestVerilog.cpp
===
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -1155,6 +1155,66 @@
   verifyFormat("{< 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
- C.OriginalWhitespaceRange.getBegin()) {
-  // Do not generate two replacements for the same location.
-  continue;
+if (i > 0) {
+  auto Last = Changes[i - 1].OriginalWhitespaceRange;
+  auto New = Changes[i].OriginalWhitespaceRange;
+  // Do not generate two replacements for the same location.  As a special
+  // case, it is allowed if there is a replacement for the empty range
+  // between 2 tokens and another non-empty range at the start of the second
+  // token.  We didn't implement logic to combine replacements for 2
+  // consecutive source ranges into a single replacement, because the
+  // program works fine without it.
+  //
+  // We can't eliminate empty original whitespace ranges.  They appear when
+  // 2 tokens have no whitespace in between in the input.  It does not
+  // matter whether whitespace is to be added.  If no whitespace is to be
+  // added, the replacement will be empty, and it gets eliminated after this
+  // step in storeReplacement.  For example, if the input is `foo();`,
+  // there will be a replacement for the range between every consecutive
+  // pair of tokens.
+  //
+  // A replacement at the start of a token can be added by
+  // BreakableVerilogStringLiteral::insertBreak when it adds braces around
+  // the string literal.  Say Verilog code is being formatted and the first
+  // line is to become the next 2 lines.
+  // x("long string");
+  // x({"long ",
+  //"string"});
+  // There will be a replacement for the empty range between the parenthesis
+  // and the string and another replacement for the quote character.  The
+  // replacement for the empty range between the parenthesis and the quote
+  // comes from ContinuationIndenter::addTokenOnCurrentLine when it changes
+  // the original empty range between the parenthesis and the string to
+  // another empty one.  The replacement for the quote character comes from
+  // BreakableVerilogStringLiteral::insertBreak when it adds the brace.  In
+  // the example, the replacement for the empty range is the same as the
+  // original text.  However, eliminating replacements that are same as the
+  // original 

[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-20 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added a comment.

Apart from that I'm okay.




Comment at: clang/lib/Format/WhitespaceManager.cpp:1428
+  // case, it is allowed if there is a replacement for the empty range
+  // between 2 tokens and another non-empty range at the start of the 
second
+  // token.

HazardyKnusperkeks wrote:
> Can you elaborate where the 2 replacements come from, and why we can't make 
> sure there is no empty range?
This is open (at least for me).


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-20 Thread sstwcw via Phabricator via cfe-commits
sstwcw marked an inline comment as done.
sstwcw added a comment.

Are there any more problems?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-15 Thread sstwcw via Phabricator via cfe-commits
sstwcw marked an inline comment as done.
sstwcw added inline comments.



Comment at: clang/lib/Format/ContinuationIndenter.cpp:2247
bool DryRun, bool Strict) {
-  std::unique_ptr Token =
+  std::unique_ptr Token =
   createBreakableToken(Current, State, AllowBreak);

HazardyKnusperkeks wrote:
> This const can be readded, or not?
I forgot to add it.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-15 Thread sstwcw via Phabricator via cfe-commits
sstwcw updated this revision to Diff 540752.
sstwcw added a comment.

- Add back the const qualifier


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/BreakableToken.cpp
  clang/lib/Format/BreakableToken.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTestVerilog.cpp
  clang/unittests/Format/TokenAnnotatorTest.cpp

Index: clang/unittests/Format/TokenAnnotatorTest.cpp
===
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1802,6 +1802,30 @@
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown);
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+
+  // String literals in concatenation.
+  Tokens = Annotate("x = {\"\"};");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = {\"\", \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = '{{\"\"}};");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  // Cases where the string should not be annotated that type.  Fix the
+  // `TT_Unknown` if needed in the future.
+  Tokens = Annotate("x = {\"\" == \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = {(\"\")};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = '{\"\"};");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Index: clang/unittests/Format/FormatTestVerilog.cpp
===
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -1155,6 +1155,66 @@
   verifyFormat("{< 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
- C.OriginalWhitespaceRange.getBegin()) {
-  // Do not generate two replacements for the same location.
-  continue;
+if (i > 0) {
+  auto Last = Changes[i - 1].OriginalWhitespaceRange;
+  auto New = Changes[i].OriginalWhitespaceRange;
+  // Do not generate two replacements for the same location.  As a special
+  // case, it is allowed if there is a replacement for the empty range
+  // between 2 tokens and another non-empty range at the start of the second
+  // token.
+  //
+  // An original whitespace range that is empty is encountered when 2 tokens
+  // have no whitespace in between in the input.  It does not matter whether
+  // whitespace is to be added.  For example, if the input is `foo();`,
+  // there will be a replacement for the range between every consecutive
+  // pair of tokens.
+  //
+  //  A replacement at the start of a token can be added by
+  // `BreakableVerilogStringLiteral::insertBreak` when it adds braces around
+  // the string literal.  Say the line `x("long string");` is to become the
+  // 2 lines `x({"long ",` and `   "string"});`.  There will be a
+  // replacement for the empty range between the parenthesis and the string
+  // and another replacement for the quote character.
+  if (Last.getBegin() == New.getBegin() &&
+  (Last.getEnd() != Last.getBegin() ||
+   New.getEnd() == New.getBegin())) {
+continue;
+  }
 }
 if (C.CreateReplacement) {
   std::string ReplacementText = C.PreviousLinePostfix;
Index: clang/lib/Format/TokenAnnotator.cpp
===
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -862,6 +862,11 @@
 OpeningBrace.Previous->is(TT_JsTypeColon)) {
   Contexts.back().IsExpression = false;
 }
+if (Style.isVerilog() &&
+(!OpeningBrace.getPreviousNonComment() ||
+ OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
+  Contexts.back().VerilogMayBeConcatenation = true;
+}
 
 unsigned CommaCount = 0;
 while (CurrentToken) {
@@ -1736,6 +1741,9 @@
 bool InCpp11AttributeSpecifier = false;
 bool InCSharpAttributeSpecifier = false;
 

[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-15 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added inline comments.



Comment at: clang/lib/Format/ContinuationIndenter.cpp:2247
bool DryRun, bool Strict) {
-  std::unique_ptr Token =
+  std::unique_ptr Token =
   createBreakableToken(Current, State, AllowBreak);

This const can be readded, or not?



Comment at: clang/lib/Format/WhitespaceManager.cpp:1428
+  // case, it is allowed if there is a replacement for the empty range
+  // between 2 tokens and another non-empty range at the start of the 
second
+  // token.

Can you elaborate where the 2 replacements come from, and why we can't make 
sure there is no empty range?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-14 Thread sstwcw via Phabricator via cfe-commits
sstwcw updated this revision to Diff 540611.
sstwcw edited the summary of this revision.
sstwcw added a comment.

- Add back the const qualifiers for methods


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/BreakableToken.cpp
  clang/lib/Format/BreakableToken.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTestVerilog.cpp
  clang/unittests/Format/TokenAnnotatorTest.cpp

Index: clang/unittests/Format/TokenAnnotatorTest.cpp
===
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1792,6 +1792,30 @@
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown);
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+
+  // String literals in concatenation.
+  Tokens = Annotate("x = {\"\"};");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = {\"\", \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = '{{\"\"}};");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  // Cases where the string should not be annotated that type.  Fix the
+  // `TT_Unknown` if needed in the future.
+  Tokens = Annotate("x = {\"\" == \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = {(\"\")};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = '{\"\"};");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Index: clang/unittests/Format/FormatTestVerilog.cpp
===
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -1155,6 +1155,66 @@
   verifyFormat("{< 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
- C.OriginalWhitespaceRange.getBegin()) {
-  // Do not generate two replacements for the same location.
-  continue;
+if (i > 0) {
+  auto Last = Changes[i - 1].OriginalWhitespaceRange;
+  auto New = Changes[i].OriginalWhitespaceRange;
+  // Do not generate two replacements for the same location.  As a special
+  // case, it is allowed if there is a replacement for the empty range
+  // between 2 tokens and another non-empty range at the start of the second
+  // token.
+  if (Last.getBegin() == New.getBegin() &&
+  (Last.getEnd() != Last.getBegin() ||
+   New.getEnd() == New.getBegin())) {
+continue;
+  }
 }
 if (C.CreateReplacement) {
   std::string ReplacementText = C.PreviousLinePostfix;
Index: clang/lib/Format/TokenAnnotator.cpp
===
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -862,6 +862,11 @@
 OpeningBrace.Previous->is(TT_JsTypeColon)) {
   Contexts.back().IsExpression = false;
 }
+if (Style.isVerilog() &&
+(!OpeningBrace.getPreviousNonComment() ||
+ OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
+  Contexts.back().VerilogMayBeConcatenation = true;
+}
 
 unsigned CommaCount = 0;
 while (CurrentToken) {
@@ -1736,6 +1741,9 @@
 bool InCpp11AttributeSpecifier = false;
 bool InCSharpAttributeSpecifier = false;
 bool VerilogAssignmentFound = false;
+// Whether the braces may mean concatenation instead of structure or array
+// literal.
+bool VerilogMayBeConcatenation = false;
 enum {
   Unknown,
   // Like the part after `:` in a constructor.
@@ -2068,6 +2076,14 @@
   } else {
 Current.setType(TT_LineComment);
   }
+} else if (Current.is(tok::string_literal)) {
+  if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
+  Current.getPreviousNonComment() &&
+  Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
+  Current.getNextNonComment() &&
+  Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
+

[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-06 Thread sstwcw via Phabricator via cfe-commits
sstwcw updated this revision to Diff 537975.
sstwcw marked an inline comment as done.
sstwcw added a comment.

- Drop the line in the doc


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/BreakableToken.cpp
  clang/lib/Format/BreakableToken.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTestVerilog.cpp
  clang/unittests/Format/TokenAnnotatorTest.cpp

Index: clang/unittests/Format/TokenAnnotatorTest.cpp
===
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1792,6 +1792,30 @@
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown);
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+
+  // String literals in concatenation.
+  Tokens = Annotate("x = {\"\"};");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = {\"\", \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = '{{\"\"}};");
+  ASSERT_EQ(Tokens.size(), 10u) << Tokens;
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  // Cases where the string should not be annotated that type.  Fix the
+  // `TT_Unknown` if needed in the future.
+  Tokens = Annotate("x = {\"\" == \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = {(\"\")};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = '{\"\"};");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Index: clang/unittests/Format/FormatTestVerilog.cpp
===
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -1155,6 +1155,66 @@
   verifyFormat("{< 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
- C.OriginalWhitespaceRange.getBegin()) {
-  // Do not generate two replacements for the same location.
-  continue;
+if (i > 0) {
+  auto Last = Changes[i - 1].OriginalWhitespaceRange;
+  auto New = Changes[i].OriginalWhitespaceRange;
+  // Do not generate two replacements for the same location.  As a special
+  // case, it is allowed if there is a replacement for the empty range
+  // between 2 tokens and another non-empty range at the start of the second
+  // token.
+  if (Last.getBegin() == New.getBegin() &&
+  (Last.getEnd() != Last.getBegin() ||
+   New.getEnd() == New.getBegin())) {
+continue;
+  }
 }
 if (C.CreateReplacement) {
   std::string ReplacementText = C.PreviousLinePostfix;
Index: clang/lib/Format/TokenAnnotator.cpp
===
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -862,6 +862,11 @@
 OpeningBrace.Previous->is(TT_JsTypeColon)) {
   Contexts.back().IsExpression = false;
 }
+if (Style.isVerilog() &&
+(!OpeningBrace.getPreviousNonComment() ||
+ OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
+  Contexts.back().VerilogMayBeConcatenation = true;
+}
 
 unsigned CommaCount = 0;
 while (CurrentToken) {
@@ -1736,6 +1741,9 @@
 bool InCpp11AttributeSpecifier = false;
 bool InCSharpAttributeSpecifier = false;
 bool VerilogAssignmentFound = false;
+// Whether the braces may mean concatenation instead of structure or array
+// literal.
+bool VerilogMayBeConcatenation = false;
 enum {
   Unknown,
   // Like the part after `:` in a constructor.
@@ -2068,6 +2076,14 @@
   } else {
 Current.setType(TT_LineComment);
   }
+} else if (Current.is(tok::string_literal)) {
+  if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
+  Current.getPreviousNonComment() &&
+  Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
+  Current.getNextNonComment() &&
+  Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
+

[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-06 Thread sstwcw via Phabricator via cfe-commits
sstwcw added a comment.

If it is expected that the method does not modify the field, and the 
expectation is not fulfilled, I prefer to make it clear rather than to pretend 
that the method works as expected.  Can you help me come up with an 
alternative?  Braces need to be added around the string literal when it gets 
broken into parts.  Only one pair of braces need to be added no matter how many 
times the string gets broken.  My solution was to use a mutable field to mark 
whether braces have already been added.  Do you have another way?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-06 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added a comment.

In D154093#4466597 , @sstwcw wrote:

> In D154093#4466246 , 
> @HazardyKnusperkeks wrote:
>
>> I'd really prefer you put what you need to modify `mutable` instead of 
>> removing the `const` from everything else. But that's just my opinion.
>
> Why should the `insertBreak` method be const?

Because it shouldn't and up until now doesn't modify the `BreakableToken` the 
modifying changes happen in the `WhitespaceManager`.




Comment at: clang/include/clang/Format/Format.h:1929
+  ///
+  /// For C#, Java, and JavaScript, the developers never got to implementing 
the
+  /// feature.

I'd drop that, or replace it with a nicer sentence which only speaks of "all 
other languages", because right now you are missing at least protobuf and json.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-01 Thread sstwcw via Phabricator via cfe-commits
sstwcw added inline comments.



Comment at: clang/include/clang/Format/Format.h:1914
   ///const char* x =
-  ///  "veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
+  ///"veryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryVeryLongString";
   /// \endcode

It looks like they made a mistake in this example.  The second line gets 
indented by a continuation indentation both before and after this patch.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-01 Thread sstwcw via Phabricator via cfe-commits
sstwcw marked 2 inline comments as done.
sstwcw added a comment.

In D154093#4466246 , 
@HazardyKnusperkeks wrote:

> I'd really prefer you put what you need to modify `mutable` instead of 
> removing the `const` from everything else. But that's just my opinion.

Why should the `insertBreak` method be const?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-01 Thread sstwcw via Phabricator via cfe-commits
sstwcw updated this revision to Diff 536555.
sstwcw added a comment.

- Address comments


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/BreakableToken.cpp
  clang/lib/Format/BreakableToken.h
  clang/lib/Format/ContinuationIndenter.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/TokenAnnotator.cpp
  clang/lib/Format/WhitespaceManager.cpp
  clang/unittests/Format/FormatTestVerilog.cpp
  clang/unittests/Format/TokenAnnotatorTest.cpp

Index: clang/unittests/Format/TokenAnnotatorTest.cpp
===
--- clang/unittests/Format/TokenAnnotatorTest.cpp
+++ clang/unittests/Format/TokenAnnotatorTest.cpp
@@ -1770,6 +1770,24 @@
   EXPECT_TOKEN(Tokens[2], tok::l_paren, TT_Unknown);
   EXPECT_TOKEN(Tokens[5], tok::comma, TT_Unknown);
   EXPECT_TOKEN(Tokens[8], tok::r_paren, TT_Unknown);
+
+  // String literals in concatenation.
+  Tokens = Annotate("x = {\"\"};");
+  ASSERT_EQ(Tokens.size(), 7u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  Tokens = Annotate("x = {\"\", \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_VerilogStringInConcatenation);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_VerilogStringInConcatenation);
+  // Cases where the string should not be annotated that type.  Fix the
+  // `TT_Unknown` if needed in the future.
+  Tokens = Annotate("x = {\"\" == \"\"};");
+  ASSERT_EQ(Tokens.size(), 9u) << Tokens;
+  EXPECT_TOKEN(Tokens[3], tok::string_literal, TT_Unknown);
+  EXPECT_TOKEN(Tokens[5], tok::string_literal, TT_Unknown);
+  Tokens = Annotate("x = '{\"\"};");
+  ASSERT_EQ(Tokens.size(), 8u) << Tokens;
+  EXPECT_TOKEN(Tokens[4], tok::string_literal, TT_Unknown);
 }
 
 TEST_F(TokenAnnotatorTest, UnderstandConstructors) {
Index: clang/unittests/Format/FormatTestVerilog.cpp
===
--- clang/unittests/Format/FormatTestVerilog.cpp
+++ clang/unittests/Format/FormatTestVerilog.cpp
@@ -1155,6 +1155,66 @@
   verifyFormat("{< 0 && Changes[i - 1].OriginalWhitespaceRange.getBegin() ==
- C.OriginalWhitespaceRange.getBegin()) {
-  // Do not generate two replacements for the same location.
-  continue;
+if (i > 0) {
+  auto Last = Changes[i - 1].OriginalWhitespaceRange;
+  auto New = Changes[i].OriginalWhitespaceRange;
+  // Do not generate two replacements for the same location.  As a special
+  // case, it is allowed if there is a replacement for the empty range
+  // between 2 tokens and another non-empty range at the start of the second
+  // token.
+  if (Last.getBegin() == New.getBegin() &&
+  (Last.getEnd() != Last.getBegin() ||
+   New.getEnd() == New.getBegin())) {
+continue;
+  }
 }
 if (C.CreateReplacement) {
   std::string ReplacementText = C.PreviousLinePostfix;
Index: clang/lib/Format/TokenAnnotator.cpp
===
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -867,6 +867,11 @@
 OpeningBrace.Previous->is(TT_JsTypeColon)) {
   Contexts.back().IsExpression = false;
 }
+if (Style.isVerilog() &&
+(!OpeningBrace.getPreviousNonComment() ||
+ OpeningBrace.getPreviousNonComment()->isNot(Keywords.kw_apostrophe))) {
+  Contexts.back().VerilogMayBeConcatenation = true;
+}
 
 unsigned CommaCount = 0;
 while (CurrentToken) {
@@ -1741,6 +1746,9 @@
 bool InCpp11AttributeSpecifier = false;
 bool InCSharpAttributeSpecifier = false;
 bool VerilogAssignmentFound = false;
+// Whether the braces may mean concatenation instead of structure or array
+// literal.
+bool VerilogMayBeConcatenation = false;
 enum {
   Unknown,
   // Like the part after `:` in a constructor.
@@ -2073,6 +2081,14 @@
   } else {
 Current.setType(TT_LineComment);
   }
+} else if (Current.is(tok::string_literal)) {
+  if (Style.isVerilog() && Contexts.back().VerilogMayBeConcatenation &&
+  Current.getPreviousNonComment() &&
+  Current.getPreviousNonComment()->isOneOf(tok::comma, tok::l_brace) &&
+  Current.getNextNonComment() &&
+  Current.getNextNonComment()->isOneOf(tok::comma, tok::r_brace)) {
+Current.setType(TT_VerilogStringInConcatenation);
+  }
 } else if (Current.is(tok::l_paren)) {
   if (lParenStartsCppCast(Current))
 Current.setType(TT_CppCastLParen);
Index: clang/lib/Format/FormatToken.h
===
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -163,6 +163,8 @@
   

[PATCH] D154093: [clang-format] Break long strings in Verilog

2023-07-01 Thread Björn Schäpers via Phabricator via cfe-commits
HazardyKnusperkeks added a comment.

I'd really prefer you put what you need to modify `mutable` instead of removing 
the `const` from everything else. But that's just my opinion.




Comment at: clang/include/clang/Format/Format.h:1905
+  ///
+  /// In C:
   /// \code

It definitely works in C++, and I'd suspect C# and Java too.



Comment at: clang/lib/Format/BreakableToken.h:310
+  // The braces along with the quotes they replace.  Depending on the style.
+  const StringRef LeftBraceQuote, RightBraceQuote;
+  // Width added to the left.




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D154093/new/

https://reviews.llvm.org/D154093

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits